=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.7 retrieving revision 1.13 diff -u -p -r1.7 -r1.13 --- mandoc/mandocdb.c 2011/11/13 00:53:13 1.7 +++ mandoc/mandocdb.c 2011/11/26 22:38:11 1.13 @@ -1,6 +1,7 @@ -/* $Id: mandocdb.c,v 1.7 2011/11/13 00:53:13 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.13 2011/11/26 22:38:11 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons + * Copyright (c) 2011 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -38,32 +39,19 @@ #include "man.h" #include "mdoc.h" #include "mandoc.h" +#include "mandocdb.h" +#include "manpath.h" -#define MANDOC_DB "mandoc.db" -#define MANDOC_IDX "mandoc.index" #define MANDOC_BUFSZ BUFSIZ #define MANDOC_SLOP 1024 -/* Bit-fields. See mandocdb.8. */ - -#define TYPE_NAME 0x01 -#define TYPE_FUNCTION 0x02 -#define TYPE_UTILITY 0x04 -#define TYPE_INCLUDES 0x08 -#define TYPE_VARIABLE 0x10 -#define TYPE_STANDARD 0x20 -#define TYPE_AUTHOR 0x40 -#define TYPE_CONFIG 0x80 -#define TYPE_DESC 0x100 -#define TYPE_XREF 0x200 -#define TYPE_PATH 0x400 -#define TYPE_ENV 0x800 -#define TYPE_ERR 0x1000 - /* Tiny list for files. No need to bring in QUEUE. */ struct of { char *fname; /* heap-allocated */ + char *sec; + char *arch; + char *title; struct of *next; /* NULL for last one */ struct of *first; /* first in list */ }; @@ -100,18 +88,20 @@ static void buf_append(struct buf *, const char *); static void buf_appendb(struct buf *, const void *, size_t); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void hash_put(DB *, const struct buf *, int); +static void hash_put(DB *, const struct buf *, uint64_t); static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, DB *, DB *, const char *, - DB *, const char *, int, + DB *, const char *, int, int, recno_t, const recno_t *, size_t); static void index_prune(const struct of *, DB *, const char *, DB *, const char *, int, recno_t *, recno_t **, size_t *); -static void ofile_argbuild(char *[], int, int, struct of **); -static int ofile_dirbuild(const char *, int, struct of **); +static void ofile_argbuild(char *[], int, int, int, + struct of **); +static int ofile_dirbuild(const char *, const char *, + const char *, int, int, struct of **); static void ofile_free(struct of *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); @@ -264,21 +254,23 @@ int main(int argc, char *argv[]) { struct mparse *mp; /* parse sequence */ + struct manpaths dirs; enum op op; /* current operation */ const char *dir; char ibuf[MAXPATHLEN], /* index fname */ fbuf[MAXPATHLEN]; /* btree fname */ int verb, /* output verbosity */ + use_all, /* use all directories and files */ ch, i, flags; DB *idx, /* index database */ *db, /* keyword database */ *hash; /* temporary keyword hashtable */ BTREEINFO info; /* btree configuration */ - recno_t maxrec; /* supremum of all records */ - recno_t *recs; /* buffer of empty records */ + recno_t maxrec; /* last record number in the index */ + recno_t *recs; /* the numbers of all empty records */ size_t sz1, sz2, - recsz, /* buffer size of recs */ - reccur; /* valid number of recs */ + recsz, /* number of allocated slots in recs */ + reccur; /* current number of empty records */ struct buf buf, /* keyword buffer */ dbuf; /* description buffer */ struct of *of; /* list of files for processing */ @@ -291,7 +283,10 @@ main(int argc, char *argv[]) else ++progname; + memset(&dirs, 0, sizeof(struct manpaths)); + verb = 0; + use_all = 0; of = NULL; db = idx = NULL; mp = NULL; @@ -302,8 +297,11 @@ main(int argc, char *argv[]) op = OP_NEW; dir = NULL; - while (-1 != (ch = getopt(argc, argv, "d:u:v"))) + while (-1 != (ch = getopt(argc, argv, "ad:u:v"))) switch (ch) { + case ('a'): + use_all = 1; + break; case ('d'): dir = optarg; op = OP_UPDATE; @@ -360,7 +358,7 @@ main(int argc, char *argv[]) if (NULL == db) { perror(fbuf); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == db) { + } else if (NULL == idx) { perror(ibuf); exit((int)MANDOCLEVEL_SYSERR); } @@ -370,7 +368,7 @@ main(int argc, char *argv[]) printf("%s: Opened\n", ibuf); } - ofile_argbuild(argv, argc, verb, &of); + ofile_argbuild(argv, argc, use_all, verb, &of); if (NULL == of) goto out; @@ -380,36 +378,56 @@ main(int argc, char *argv[]) &maxrec, &recs, &recsz); if (OP_UPDATE == op) - index_merge(of, mp, &dbuf, &buf, hash, - db, fbuf, idx, ibuf, verb, - maxrec, recs, reccur); + index_merge(of, mp, &dbuf, &buf, hash, + db, fbuf, idx, ibuf, use_all, + verb, maxrec, recs, reccur); goto out; } - for (i = 0; i < argc; i++) { + /* + * Configure the directories we're going to scan. + * If we have command-line arguments, use them. + * If not, we use man(1)'s method (see mandocdb.8). + */ + + if (argc > 0) { + dirs.paths = mandoc_malloc(argc * sizeof(char *)); + dirs.sz = argc; + for (i = 0; i < argc; i++) + dirs.paths[i] = mandoc_strdup(argv[i]); + } else + manpath_parse(&dirs, NULL, NULL); + + for (i = 0; i < dirs.sz; i++) { ibuf[0] = fbuf[0] = '\0'; - strlcat(fbuf, argv[i], MAXPATHLEN); + strlcat(fbuf, dirs.paths[i], MAXPATHLEN); strlcat(fbuf, "/", MAXPATHLEN); sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); - strlcat(ibuf, argv[i], MAXPATHLEN); + strlcat(ibuf, dirs.paths[i], MAXPATHLEN); strlcat(ibuf, "/", MAXPATHLEN); sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", argv[i]); + fprintf(stderr, "%s: Path too long\n", + dirs.paths[i]); exit((int)MANDOCLEVEL_BADARG); } + if (db) + (*db->close)(db); + if (idx) + (*idx->close)(idx); + db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); if (NULL == db) { perror(fbuf); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == db) { + } else if (NULL == idx) { perror(ibuf); exit((int)MANDOCLEVEL_SYSERR); } @@ -422,7 +440,8 @@ main(int argc, char *argv[]) ofile_free(of); of = NULL; - if ( ! ofile_dirbuild(argv[i], verb, &of)) + if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, + use_all, verb, &of)) exit((int)MANDOCLEVEL_SYSERR); if (NULL == of) @@ -430,8 +449,9 @@ main(int argc, char *argv[]) of = of->first; - index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, - idx, ibuf, verb, maxrec, recs, reccur); + index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, + idx, ibuf, use_all, verb, + maxrec, recs, reccur); } out: @@ -444,6 +464,7 @@ out: if (mp) mparse_free(mp); + manpath_free(&dirs); ofile_free(of); free(buf.cp); free(dbuf.cp); @@ -455,8 +476,8 @@ out: void index_merge(const struct of *of, struct mparse *mp, struct buf *dbuf, struct buf *buf, - DB *hash, DB *db, const char *dbf, - DB *idx, const char *idxf, int verb, + DB *hash, DB *db, const char *dbf, + DB *idx, const char *idxf, int use_all, int verb, recno_t maxrec, const recno_t *recs, size_t reccur) { recno_t rec; @@ -467,7 +488,7 @@ index_merge(const struct of *of, struct mparse *mp, const char *fn, *msec, *mtitle, *arch; size_t sv; unsigned seq; - char vbuf[8]; + struct db_val vbuf; for (rec = 0; of; of = of->next) { fn = of->fname; @@ -492,17 +513,53 @@ index_merge(const struct of *of, struct mparse *mp, if (NULL == mdoc && NULL == man) continue; + /* + * By default, skip a file if the manual section + * and architecture given in the file disagree + * with the directory where the file is located. + */ + msec = NULL != mdoc ? mdoc_meta(mdoc)->msec : man_meta(man)->msec; - mtitle = NULL != mdoc ? - mdoc_meta(mdoc)->title : man_meta(man)->title; arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL; + if (0 == use_all) { + assert(of->sec); + assert(msec); + if (strcmp(msec, of->sec)) + continue; + + if (NULL == arch) { + if (NULL != of->arch) + continue; + } else if (NULL == of->arch || + strcmp(arch, of->arch)) + continue; + } + if (NULL == arch) arch = ""; /* + * By default, skip a file if the title given + * in the file disagrees with the file name. + * If both agree, use the file name as the title, + * because the one in the file usually is all caps. + */ + + mtitle = NULL != mdoc ? + mdoc_meta(mdoc)->title : man_meta(man)->title; + + assert(of->title); + assert(mtitle); + + if (0 == strcasecmp(mtitle, of->title)) + mtitle = of->title; + else if (0 == use_all) + continue; + + /* * The index record value consists of a nil-terminated * filename, a nil-terminated manual section, and a * nil-terminated description. Since the description @@ -530,17 +587,15 @@ index_merge(const struct of *of, struct mparse *mp, * Copy from the in-memory hashtable of pending keywords * into the database. */ - - memset(vbuf, 0, sizeof(uint32_t)); - memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + vbuf.rec = rec; seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - memcpy(vbuf, val.data, sizeof(uint32_t)); - val.size = sizeof(vbuf); - val.data = vbuf; + vbuf.mask = *(uint64_t *)val.data; + val.size = sizeof(struct db_val); + val.data = &vbuf; if (verb > 1) printf("%s: Added keyword: %s\n", @@ -585,6 +640,7 @@ index_prune(const struct of *ofile, DB *db, const char { const struct of *of; const char *fn; + struct db_val *vbuf; unsigned seq, sseq; DBT key, val; size_t reccur; @@ -617,8 +673,9 @@ index_prune(const struct of *ofile, DB *db, const char sseq = R_FIRST; while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { sseq = R_NEXT; - assert(8 == val.size); - if (*maxrec != *(recno_t *)(val.data + 4)) + assert(sizeof(struct db_val) == val.size); + vbuf = val.data; + if (*maxrec != vbuf->rec) continue; if (verb) printf("%s: Deleted keyword: %s\n", @@ -728,7 +785,7 @@ pmdoc_An(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_AUTHOR); + hash_put(hash, buf, TYPE_An); } static void @@ -789,7 +846,7 @@ pmdoc_Fd(MDOC_ARGS) buf_appendb(buf, start, (size_t)(end - start + 1)); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_INCLUDES); + hash_put(hash, buf, TYPE_In); } /* ARGSUSED */ @@ -801,7 +858,7 @@ pmdoc_Cd(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_CONFIG); + hash_put(hash, buf, TYPE_Cd); } /* ARGSUSED */ @@ -815,7 +872,7 @@ pmdoc_In(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_INCLUDES); + hash_put(hash, buf, TYPE_In); } /* ARGSUSED */ @@ -841,7 +898,7 @@ pmdoc_Fn(MDOC_ARGS) cp++; buf_append(buf, cp); - hash_put(hash, buf, TYPE_FUNCTION); + hash_put(hash, buf, TYPE_Fn); } /* ARGSUSED */ @@ -855,7 +912,7 @@ pmdoc_St(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_STANDARD); + hash_put(hash, buf, TYPE_St); } /* ARGSUSED */ @@ -874,7 +931,7 @@ pmdoc_Xr(MDOC_ARGS) } else buf_appendb(buf, ".", 2); - hash_put(hash, buf, TYPE_XREF); + hash_put(hash, buf, TYPE_Xr); } /* ARGSUSED */ @@ -911,7 +968,7 @@ pmdoc_Vt(MDOC_ARGS) buf_appendb(buf, start, sz); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_VARIABLE); + hash_put(hash, buf, TYPE_Va); } /* ARGSUSED */ @@ -925,7 +982,7 @@ pmdoc_Fo(MDOC_ARGS) return; buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_FUNCTION); + hash_put(hash, buf, TYPE_Fn); } @@ -940,7 +997,7 @@ pmdoc_Nd(MDOC_ARGS) buf_appendmdoc(dbuf, n->child, 1); buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_DESC); + hash_put(hash, buf, TYPE_Nd); } /* ARGSUSED */ @@ -952,7 +1009,7 @@ pmdoc_Er(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_ERR); + hash_put(hash, buf, TYPE_Er); } /* ARGSUSED */ @@ -964,7 +1021,7 @@ pmdoc_Ev(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_ENV); + hash_put(hash, buf, TYPE_Ev); } /* ARGSUSED */ @@ -976,7 +1033,7 @@ pmdoc_Pa(MDOC_ARGS) return; buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_PATH); + hash_put(hash, buf, TYPE_Pa); } /* ARGSUSED */ @@ -986,7 +1043,7 @@ pmdoc_Nm(MDOC_ARGS) if (SEC_NAME == n->sec) { buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_NAME); + hash_put(hash, buf, TYPE_Nm); return; } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) return; @@ -995,11 +1052,11 @@ pmdoc_Nm(MDOC_ARGS) buf_append(buf, m->name); buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_UTILITY); + hash_put(hash, buf, TYPE_Nm); } static void -hash_put(DB *db, const struct buf *buf, int mask) +hash_put(DB *db, const struct buf *buf, uint64_t mask) { DBT key, val; int rc; @@ -1014,10 +1071,10 @@ hash_put(DB *db, const struct buf *buf, int mask) perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } else if (0 == rc) - mask |= *(int *)val.data; + mask |= *(uint64_t *)val.data; val.data = &mask; - val.size = sizeof(int); + val.size = sizeof(uint64_t); if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { perror("hash"); @@ -1125,7 +1182,7 @@ pman_node(MAN_ARGS) buf_appendb(buf, start, sz); buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_NAME); + hash_put(hash, buf, TYPE_Nm); if (' ' == start[(int)sz]) { start += (int)sz + 1; @@ -1164,7 +1221,7 @@ pman_node(MAN_ARGS) buf_appendb(dbuf, start, sz); buf_appendb(buf, start, sz); - hash_put(hash, buf, TYPE_DESC); + hash_put(hash, buf, TYPE_Nd); } } @@ -1176,14 +1233,64 @@ pman_node(MAN_ARGS) } static void -ofile_argbuild(char *argv[], int argc, int verb, struct of **of) +ofile_argbuild(char *argv[], int argc, int use_all, int verb, + struct of **of) { + char buf[MAXPATHLEN]; + char *sec, *arch, *title, *p; int i; struct of *nof; for (i = 0; i < argc; i++) { + + /* + * Try to infer the manual section, architecture and + * page title from the path, assuming it looks like + * man*[/]/.<section> + */ + + if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { + fprintf(stderr, "%s: Path too long\n", argv[i]); + continue; + } + sec = arch = title = NULL; + p = strrchr(buf, '\0'); + while (p-- > buf) { + if (NULL == sec && '.' == *p) { + sec = p + 1; + *p = '\0'; + continue; + } + if ('/' != *p) + continue; + if (NULL == title) { + title = p + 1; + *p = '\0'; + continue; + } + if (strncmp("man", p + 1, 3)) + arch = p + 1; + break; + } + if (NULL == title) + title = buf; + + /* + * Build the file structure. + */ + nof = mandoc_calloc(1, sizeof(struct of)); - nof->fname = strdup(argv[i]); + nof->fname = mandoc_strdup(argv[i]); + if (NULL != sec) + nof->sec = mandoc_strdup(sec); + if (NULL != arch) + nof->arch = mandoc_strdup(arch); + nof->title = mandoc_strdup(title); + + /* + * Add the structure to the list. + */ + if (verb > 2) printf("%s: Scheduling\n", argv[i]); if (NULL == *of) { @@ -1206,12 +1313,14 @@ ofile_argbuild(char *argv[], int argc, int verb, struc * Pass in a pointer to a NULL structure for the first invocation. */ static int -ofile_dirbuild(const char *dir, int verb, struct of **of) +ofile_dirbuild(const char *dir, const char* psec, const char *parch, + int use_all, int verb, struct of **of) { char buf[MAXPATHLEN]; size_t sz; DIR *d; - const char *fn; + const char *fn, *sec, *arch; + char *suffix; struct of *nof; struct dirent *dp; @@ -1222,34 +1331,69 @@ ofile_dirbuild(const char *dir, int verb, struct of ** while (NULL != (dp = readdir(d))) { fn = dp->d_name; + + if ('.' == *fn) + continue; + if (DT_DIR == dp->d_type) { - if (0 == strcmp(".", fn)) + sec = psec; + arch = parch; + + /* + * By default, only use directories called: + * man<section>/[<arch>/] + */ + + if (NULL == sec) { + if(0 == strncmp("man", fn, 3)) + sec = fn + 3; + else if (use_all) + sec = fn; + else + continue; + } else if (NULL == arch && (use_all || + NULL == strchr(fn, '.'))) + arch = fn; + else if (0 == use_all) continue; - if (0 == strcmp("..", fn)) - continue; buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); strlcat(buf, "/", MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); - if (sz < MAXPATHLEN) { - if ( ! ofile_dirbuild(buf, verb, of)) - return(0); - continue; - } else if (sz < MAXPATHLEN) - continue; + if (MAXPATHLEN <= sz) { + fprintf(stderr, "%s: Path too long\n", dir); + return(0); + } + + if (verb > 2) + printf("%s: Scanning\n", buf); - fprintf(stderr, "%s: Path too long\n", dir); - return(0); + if ( ! ofile_dirbuild(buf, sec, arch, + use_all, verb, of)) + return(0); } - if (DT_REG != dp->d_type) + if (DT_REG != dp->d_type || + (NULL == psec && !use_all) || + !strcmp(MANDOC_DB, fn) || + !strcmp(MANDOC_IDX, fn)) continue; - if (0 == strcmp(MANDOC_DB, fn) || - 0 == strcmp(MANDOC_IDX, fn)) - continue; + /* + * By default, skip files where the file name suffix + * does not agree with the section directory + * they are located in. + */ + suffix = strrchr(fn, '.'); + if (0 == use_all) { + if (NULL == suffix) + continue; + if (strcmp(suffix + 1, psec)) + continue; + } + buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); strlcat(buf, "/", MAXPATHLEN); @@ -1261,7 +1405,20 @@ ofile_dirbuild(const char *dir, int verb, struct of ** nof = mandoc_calloc(1, sizeof(struct of)); nof->fname = mandoc_strdup(buf); + if (NULL != psec) + nof->sec = mandoc_strdup(psec); + if (NULL != parch) + nof->arch = mandoc_strdup(parch); + /* + * Remember the file name without the extension, + * to be used as the page title in the database. + */ + + if (NULL != suffix) + *suffix = '\0'; + nof->title = mandoc_strdup(fn); + if (verb > 2) printf("%s: Scheduling\n", buf); @@ -1287,6 +1444,9 @@ ofile_free(struct of *of) while (of) { nof = of->next; free(of->fname); + free(of->sec); + free(of->arch); + free(of->title); free(of); of = nof; }