=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.23 retrieving revision 1.43 diff -u -p -r1.23 -r1.43 --- mandoc/mandocdb.c 2011/12/03 18:47:09 1.23 +++ mandoc/mandocdb.c 2011/12/31 18:47:52 1.43 @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.23 2011/12/03 18:47:09 kristaps Exp $ */ +/* $Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -21,9 +21,9 @@ #include #include -#include #include +#include #include #include #include @@ -55,6 +55,24 @@ #define MANDOC_SRC 0x1 #define MANDOC_FORM 0x2 +/* Access to the mandoc database on disk. */ + +struct mdb { + char idxn[MAXPATHLEN]; /* index db filename */ + char dbn[MAXPATHLEN]; /* keyword db filename */ + DB *idx; /* index recno database */ + DB *db; /* keyword btree database */ +}; + +/* Stack of temporarily unused index records. */ + +struct recs { + recno_t *stack; /* pointer to a malloc'ed array */ + size_t size; /* number of allocated slots */ + size_t cur; /* current number of empty records */ + recno_t last; /* last record number in the index */ +}; + /* Tiny list for files. No need to bring in QUEUE. */ struct of { @@ -78,9 +96,11 @@ struct buf { /* Operation we're going to perform. */ enum op { - OP_NEW = 0, /* new database */ + OP_DEFAULT = 0, /* new dbs from dir list or default config */ + OP_CONFFILE, /* new databases from custom config file */ OP_UPDATE, /* delete/add entries in existing database */ - OP_DELETE /* delete entries from existing database */ + OP_DELETE, /* delete entries from existing database */ + OP_TEST /* change no databases, report potential problems */ }; #define MAN_ARGS DB *hash, \ @@ -103,185 +123,181 @@ static void hash_put(DB *, const struct buf *, uint static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, DB *, - DB *, const char *, DB *, const char *, - recno_t, const recno_t *, size_t); -static void index_prune(const struct of *, DB *, - const char *, DB *, const char *, - recno_t *, recno_t **, size_t *); + struct mdb *, struct recs *); +static void index_prune(const struct of *, struct mdb *, + struct recs *); static void ofile_argbuild(int, char *[], struct of **); -static int ofile_dirbuild(const char *, const char *, +static void ofile_dirbuild(const char *, const char *, const char *, int, struct of **); static void ofile_free(struct of *); -static void pformatted(DB *, struct buf *, struct buf *, - const struct of *); +static void pformatted(DB *, struct buf *, + struct buf *, const struct of *); static int pman_node(MAN_ARGS); static void pmdoc_node(MDOC_ARGS); -static void pmdoc_An(MDOC_ARGS); -static void pmdoc_Cd(MDOC_ARGS); -static void pmdoc_Er(MDOC_ARGS); -static void pmdoc_Ev(MDOC_ARGS); -static void pmdoc_Fd(MDOC_ARGS); -static void pmdoc_In(MDOC_ARGS); -static void pmdoc_Fn(MDOC_ARGS); -static void pmdoc_Fo(MDOC_ARGS); -static void pmdoc_Nd(MDOC_ARGS); -static void pmdoc_Nm(MDOC_ARGS); -static void pmdoc_Pa(MDOC_ARGS); -static void pmdoc_St(MDOC_ARGS); -static void pmdoc_Vt(MDOC_ARGS); -static void pmdoc_Xr(MDOC_ARGS); -static void usage(void); +static int pmdoc_head(MDOC_ARGS); +static int pmdoc_body(MDOC_ARGS); +static int pmdoc_Fd(MDOC_ARGS); +static int pmdoc_In(MDOC_ARGS); +static int pmdoc_Fn(MDOC_ARGS); +static int pmdoc_Nd(MDOC_ARGS); +static int pmdoc_Nm(MDOC_ARGS); +static int pmdoc_Sh(MDOC_ARGS); +static int pmdoc_St(MDOC_ARGS); +static int pmdoc_Xr(MDOC_ARGS); -typedef void (*pmdoc_nf)(MDOC_ARGS); +#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ -static const pmdoc_nf mdocs[MDOC_MAX] = { - NULL, /* Ap */ - NULL, /* Dd */ - NULL, /* Dt */ - NULL, /* Os */ - NULL, /* Sh */ - NULL, /* Ss */ - NULL, /* Pp */ - NULL, /* D1 */ - NULL, /* Dl */ - NULL, /* Bd */ - NULL, /* Ed */ - NULL, /* Bl */ - NULL, /* El */ - NULL, /* It */ - NULL, /* Ad */ - pmdoc_An, /* An */ - NULL, /* Ar */ - pmdoc_Cd, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - pmdoc_Er, /* Er */ - pmdoc_Ev, /* Ev */ - NULL, /* Ex */ - NULL, /* Fa */ - pmdoc_Fd, /* Fd */ - NULL, /* Fl */ - pmdoc_Fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - pmdoc_In, /* In */ - NULL, /* Li */ - pmdoc_Nd, /* Nd */ - pmdoc_Nm, /* Nm */ - NULL, /* Op */ - NULL, /* Ot */ - pmdoc_Pa, /* Pa */ - NULL, /* Rv */ - pmdoc_St, /* St */ - pmdoc_Vt, /* Va */ - pmdoc_Vt, /* Vt */ - pmdoc_Xr, /* Xr */ - NULL, /* %A */ - NULL, /* %B */ - NULL, /* %D */ - NULL, /* %I */ - NULL, /* %J */ - NULL, /* %N */ - NULL, /* %O */ - NULL, /* %P */ - NULL, /* %R */ - NULL, /* %T */ - NULL, /* %V */ - NULL, /* Ac */ - NULL, /* Ao */ - NULL, /* Aq */ - NULL, /* At */ - NULL, /* Bc */ - NULL, /* Bf */ - NULL, /* Bo */ - NULL, /* Bq */ - NULL, /* Bsx */ - NULL, /* Bx */ - NULL, /* Db */ - NULL, /* Dc */ - NULL, /* Do */ - NULL, /* Dq */ - NULL, /* Ec */ - NULL, /* Ef */ - NULL, /* Em */ - NULL, /* Eo */ - NULL, /* Fx */ - NULL, /* Ms */ - NULL, /* No */ - NULL, /* Ns */ - NULL, /* Nx */ - NULL, /* Ox */ - NULL, /* Pc */ - NULL, /* Pf */ - NULL, /* Po */ - NULL, /* Pq */ - NULL, /* Qc */ - NULL, /* Ql */ - NULL, /* Qo */ - NULL, /* Qq */ - NULL, /* Re */ - NULL, /* Rs */ - NULL, /* Sc */ - NULL, /* So */ - NULL, /* Sq */ - NULL, /* Sm */ - NULL, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ - NULL, /* Ux */ - NULL, /* Xc */ - NULL, /* Xo */ - pmdoc_Fo, /* Fo */ - NULL, /* Fc */ - NULL, /* Oo */ - NULL, /* Oc */ - NULL, /* Bk */ - NULL, /* Ek */ - NULL, /* Bt */ - NULL, /* Hf */ - NULL, /* Fr */ - NULL, /* Ud */ - NULL, /* Lb */ - NULL, /* Lp */ - NULL, /* Lk */ - NULL, /* Mt */ - NULL, /* Brq */ - NULL, /* Bro */ - NULL, /* Brc */ - NULL, /* %C */ - NULL, /* Es */ - NULL, /* En */ - NULL, /* Dx */ - NULL, /* %Q */ - NULL, /* br */ - NULL, /* sp */ - NULL, /* %U */ - NULL, /* Ta */ +struct mdoc_handler { + int (*fp)(MDOC_ARGS); /* Optional handler. */ + uint64_t mask; /* Set unless handler returns 0. */ + int flags; /* For use by pmdoc_node. */ }; +static const struct mdoc_handler mdocs[MDOC_MAX] = { + { NULL, 0, 0 }, /* Ap */ + { NULL, 0, 0 }, /* Dd */ + { NULL, 0, 0 }, /* Dt */ + { NULL, 0, 0 }, /* Os */ + { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ + { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ + { NULL, 0, 0 }, /* Pp */ + { NULL, 0, 0 }, /* D1 */ + { NULL, 0, 0 }, /* Dl */ + { NULL, 0, 0 }, /* Bd */ + { NULL, 0, 0 }, /* Ed */ + { NULL, 0, 0 }, /* Bl */ + { NULL, 0, 0 }, /* El */ + { NULL, 0, 0 }, /* It */ + { NULL, 0, 0 }, /* Ad */ + { NULL, TYPE_An, MDOCF_CHILD }, /* An */ + { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ + { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ + { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ + { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ + { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ + { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ + { NULL, 0, 0 }, /* Ex */ + { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ + { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ + { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ + { pmdoc_Fn, 0, 0 }, /* Fn */ + { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ + { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ + { pmdoc_In, TYPE_In, 0 }, /* In */ + { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ + { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ + { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ + { NULL, 0, 0 }, /* Op */ + { NULL, 0, 0 }, /* Ot */ + { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ + { NULL, 0, 0 }, /* Rv */ + { pmdoc_St, TYPE_St, 0 }, /* St */ + { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ + { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ + { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ + { NULL, 0, 0 }, /* %A */ + { NULL, 0, 0 }, /* %B */ + { NULL, 0, 0 }, /* %D */ + { NULL, 0, 0 }, /* %I */ + { NULL, 0, 0 }, /* %J */ + { NULL, 0, 0 }, /* %N */ + { NULL, 0, 0 }, /* %O */ + { NULL, 0, 0 }, /* %P */ + { NULL, 0, 0 }, /* %R */ + { NULL, 0, 0 }, /* %T */ + { NULL, 0, 0 }, /* %V */ + { NULL, 0, 0 }, /* Ac */ + { NULL, 0, 0 }, /* Ao */ + { NULL, 0, 0 }, /* Aq */ + { NULL, TYPE_At, MDOCF_CHILD }, /* At */ + { NULL, 0, 0 }, /* Bc */ + { NULL, 0, 0 }, /* Bf */ + { NULL, 0, 0 }, /* Bo */ + { NULL, 0, 0 }, /* Bq */ + { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ + { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ + { NULL, 0, 0 }, /* Db */ + { NULL, 0, 0 }, /* Dc */ + { NULL, 0, 0 }, /* Do */ + { NULL, 0, 0 }, /* Dq */ + { NULL, 0, 0 }, /* Ec */ + { NULL, 0, 0 }, /* Ef */ + { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ + { NULL, 0, 0 }, /* Eo */ + { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ + { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ + { NULL, 0, 0 }, /* No */ + { NULL, 0, 0 }, /* Ns */ + { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ + { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ + { NULL, 0, 0 }, /* Pc */ + { NULL, 0, 0 }, /* Pf */ + { NULL, 0, 0 }, /* Po */ + { NULL, 0, 0 }, /* Pq */ + { NULL, 0, 0 }, /* Qc */ + { NULL, 0, 0 }, /* Ql */ + { NULL, 0, 0 }, /* Qo */ + { NULL, 0, 0 }, /* Qq */ + { NULL, 0, 0 }, /* Re */ + { NULL, 0, 0 }, /* Rs */ + { NULL, 0, 0 }, /* Sc */ + { NULL, 0, 0 }, /* So */ + { NULL, 0, 0 }, /* Sq */ + { NULL, 0, 0 }, /* Sm */ + { NULL, 0, 0 }, /* Sx */ + { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ + { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ + { NULL, 0, 0 }, /* Ux */ + { NULL, 0, 0 }, /* Xc */ + { NULL, 0, 0 }, /* Xo */ + { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ + { NULL, 0, 0 }, /* Fc */ + { NULL, 0, 0 }, /* Oo */ + { NULL, 0, 0 }, /* Oc */ + { NULL, 0, 0 }, /* Bk */ + { NULL, 0, 0 }, /* Ek */ + { NULL, 0, 0 }, /* Bt */ + { NULL, 0, 0 }, /* Hf */ + { NULL, 0, 0 }, /* Fr */ + { NULL, 0, 0 }, /* Ud */ + { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ + { NULL, 0, 0 }, /* Lp */ + { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ + { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ + { NULL, 0, 0 }, /* Brq */ + { NULL, 0, 0 }, /* Bro */ + { NULL, 0, 0 }, /* Brc */ + { NULL, 0, 0 }, /* %C */ + { NULL, 0, 0 }, /* Es */ + { NULL, 0, 0 }, /* En */ + { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ + { NULL, 0, 0 }, /* %Q */ + { NULL, 0, 0 }, /* br */ + { NULL, 0, 0 }, /* sp */ + { NULL, 0, 0 }, /* %U */ + { NULL, 0, 0 }, /* Ta */ +}; + static const char *progname; static int use_all; /* Use all directories and files. */ static int verb; /* Output verbosity level. */ +static int warnings; /* Potential problems in manuals. */ int main(int argc, char *argv[]) { struct mparse *mp; /* parse sequence */ struct manpaths dirs; + struct mdb mdb; + struct recs recs; enum op op; /* current operation */ const char *dir; - char ibuf[MAXPATHLEN], /* index fname */ - fbuf[MAXPATHLEN]; /* btree fname */ + char *cp; + char pbuf[PATH_MAX]; int ch, i, flags; - DB *idx, /* index database */ - *db, /* keyword database */ - *hash; /* temporary keyword hashtable */ + DB *hash; /* temporary keyword hashtable */ BTREEINFO info; /* btree configuration */ - recno_t maxrec; /* last record number in the index */ - recno_t *recs; /* the numbers of all empty records */ - size_t sz1, sz2, - recsz, /* number of allocated slots in recs */ - reccur; /* current number of empty records */ + size_t sz1, sz2; struct buf buf, /* keyword buffer */ dbuf; /* description buffer */ struct of *of; /* list of files for processing */ @@ -295,43 +311,76 @@ main(int argc, char *argv[]) ++progname; memset(&dirs, 0, sizeof(struct manpaths)); + memset(&mdb, 0, sizeof(struct mdb)); + memset(&recs, 0, sizeof(struct recs)); - verb = 0; - use_all = 0; of = NULL; - db = idx = NULL; mp = NULL; hash = NULL; - recs = NULL; - recsz = reccur = 0; - maxrec = 0; - op = OP_NEW; + op = OP_DEFAULT; dir = NULL; - while (-1 != (ch = getopt(argc, argv, "ad:u:v"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) switch (ch) { case ('a'): use_all = 1; break; + case ('C'): + if (op) { + fprintf(stderr, + "-C: conflicting options\n"); + goto usage; + } + dir = optarg; + op = OP_CONFFILE; + break; case ('d'): + if (op) { + fprintf(stderr, + "-d: conflicting options\n"); + goto usage; + } dir = optarg; op = OP_UPDATE; break; + case ('t'): + dup2(STDOUT_FILENO, STDERR_FILENO); + if (op) { + fprintf(stderr, + "-t: conflicting options\n"); + goto usage; + } + op = OP_TEST; + use_all = 1; + warnings = 1; + break; case ('u'): + if (op) { + fprintf(stderr, + "-u: conflicting options\n"); + goto usage; + } dir = optarg; op = OP_DELETE; break; case ('v'): verb++; break; + case ('W'): + warnings = 1; + break; default: - usage(); - return((int)MANDOCLEVEL_BADARG); + goto usage; } argc -= optind; argv += optind; + if (OP_CONFFILE == op && argc > 0) { + fprintf(stderr, "-C: too many arguments\n"); + goto usage; + } + memset(&info, 0, sizeof(BTREEINFO)); info.flags = R_DUP; @@ -345,60 +394,63 @@ main(int argc, char *argv[]) buf.cp = mandoc_malloc(buf.size); dbuf.cp = mandoc_malloc(dbuf.size); - flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR; + flags = O_CREAT | O_RDWR; + if (OP_DEFAULT == op || OP_CONFFILE == op) + flags |= O_TRUNC; + if (OP_TEST == op) { + ofile_argbuild(argc, argv, &of); + if (NULL == of) + goto out; + index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); + goto out; + } + if (OP_UPDATE == op || OP_DELETE == op) { - ibuf[0] = fbuf[0] = '\0'; + strlcat(mdb.dbn, dir, MAXPATHLEN); + strlcat(mdb.dbn, "/", MAXPATHLEN); + sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); - strlcat(fbuf, dir, MAXPATHLEN); - strlcat(fbuf, "/", MAXPATHLEN); - sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + strlcat(mdb.idxn, dir, MAXPATHLEN); + strlcat(mdb.idxn, "/", MAXPATHLEN); + sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); - strlcat(ibuf, dir, MAXPATHLEN); - strlcat(ibuf, "/", MAXPATHLEN); - sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); - if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", dir); + fprintf(stderr, "%s: path too long\n", dir); exit((int)MANDOCLEVEL_BADARG); } - db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - if (NULL == db) { - perror(fbuf); + if (NULL == mdb.db) { + perror(mdb.dbn); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == idx) { - perror(ibuf); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Opened\n", fbuf); - printf("%s: Opened\n", ibuf); - } - ofile_argbuild(argc, argv, &of); + if (NULL == of) goto out; - of = of->first; + index_prune(of, &mdb, &recs); - index_prune(of, db, fbuf, idx, ibuf, - &maxrec, &recs, &recsz); - /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ if (OP_UPDATE == op) { - chdir(dir); + if (-1 == chdir(dir)) { + perror(dir); + exit((int)MANDOCLEVEL_SYSERR); + } index_merge(of, mp, &dbuf, &buf, hash, - db, fbuf, idx, ibuf, - maxrec, recs, reccur); + &mdb, &recs); } goto out; @@ -411,79 +463,82 @@ main(int argc, char *argv[]) */ if (argc > 0) { - dirs.paths = mandoc_malloc(argc * sizeof(char *)); + dirs.paths = mandoc_calloc(argc, sizeof(char *)); dirs.sz = argc; - for (i = 0; i < argc; i++) - dirs.paths[i] = mandoc_strdup(argv[i]); + for (i = 0; i < argc; i++) { + if (NULL == (cp = realpath(argv[i], pbuf))) { + perror(argv[i]); + goto out; + } + dirs.paths[i] = mandoc_strdup(cp); + } } else - manpath_parse(&dirs, NULL, NULL); + manpath_parse(&dirs, dir, NULL, NULL); for (i = 0; i < dirs.sz; i++) { - ibuf[0] = fbuf[0] = '\0'; + mdb.idxn[0] = mdb.dbn[0] = '\0'; - strlcat(fbuf, dirs.paths[i], MAXPATHLEN); - strlcat(fbuf, "/", MAXPATHLEN); - sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN); + strlcat(mdb.dbn, "/", MAXPATHLEN); + sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); - strlcat(ibuf, dirs.paths[i], MAXPATHLEN); - strlcat(ibuf, "/", MAXPATHLEN); - sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN); + strlcat(mdb.idxn, "/", MAXPATHLEN); + sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", + fprintf(stderr, "%s: path too long\n", dirs.paths[i]); exit((int)MANDOCLEVEL_BADARG); } - if (db) - (*db->close)(db); - if (idx) - (*idx->close)(idx); + if (mdb.db) + (*mdb.db->close)(mdb.db); + if (mdb.idx) + (*mdb.idx->close)(mdb.idx); - db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); - idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); + mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); + mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); - if (NULL == db) { - perror(fbuf); + if (NULL == mdb.db) { + perror(mdb.dbn); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == idx) { - perror(ibuf); + } else if (NULL == mdb.idx) { + perror(mdb.idxn); exit((int)MANDOCLEVEL_SYSERR); } - if (verb > 2) { - printf("%s: Truncated\n", fbuf); - printf("%s: Truncated\n", ibuf); - } - ofile_free(of); of = NULL; - if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL, - 0, &of)) + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); exit((int)MANDOCLEVEL_SYSERR); + } + ofile_dirbuild(".", "", "", 0, &of); if (NULL == of) continue; - of = of->first; - /* - * Go to the root of the respective manual tree - * such that .so links work. In case of failure, - * just prod on, even though .so links won't work. + * Go to the root of the respective manual tree. + * This must work or no manuals may be found (they're + * indexed relative to the root). */ - chdir(dirs.paths[i]); - index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, - idx, ibuf, maxrec, recs, reccur); + if (-1 == chdir(dirs.paths[i])) { + perror(dirs.paths[i]); + exit((int)MANDOCLEVEL_SYSERR); + } + + index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); } out: - if (db) - (*db->close)(db); - if (idx) - (*idx->close)(idx); + if (mdb.db) + (*mdb.db->close)(mdb.db); + if (mdb.idx) + (*mdb.idx->close)(mdb.idx); if (hash) (*hash->close)(hash); if (mp) @@ -493,54 +548,51 @@ out: ofile_free(of); free(buf.cp); free(dbuf.cp); - free(recs); + free(recs.stack); return(MANDOCLEVEL_OK); + +usage: + fprintf(stderr, + "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" + " -d dir [file ...] | " + "-u dir [file ...]\n", + progname); + + return((int)MANDOCLEVEL_BADARG); } void index_merge(const struct of *of, struct mparse *mp, struct buf *dbuf, struct buf *buf, DB *hash, - DB *db, const char *dbf, DB *idx, const char *idxf, - recno_t maxrec, const recno_t *recs, size_t reccur) + struct mdb *mdb, struct recs *recs) { recno_t rec; - int ch; + int ch, skip; DBT key, val; struct mdoc *mdoc; struct man *man; - const char *fn, *msec, *mtitle, *arch; + const char *fn, *msec, *march, *mtitle; + uint64_t mask; size_t sv; unsigned seq; - struct db_val vbuf; + uint64_t vbuf[2]; + char type; - for (rec = 0; of; of = of->next) { + rec = 0; + for (of = of->first; of; of = of->next) { fn = of->fname; /* - * Reclaim an empty index record, if available. + * Try interpreting the file as mdoc(7) or man(7) + * source code, unless it is already known to be + * formatted. Fall back to formatted mode. */ - if (reccur > 0) { - --reccur; - rec = recs[(int)reccur]; - } else if (maxrec > 0) { - rec = maxrec; - maxrec = 0; - } else - rec++; - mparse_reset(mp); - hash_reset(&hash); mdoc = NULL; man = NULL; - /* - * Try interpreting the file as mdoc(7) or man(7) - * source code, unless it is already known to be - * formatted. Fall back to formatted mode. - */ - if ((MANDOC_SRC & of->src_form || ! (MANDOC_FORM & of->src_form)) && MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) @@ -548,42 +600,65 @@ index_merge(const struct of *of, struct mparse *mp, if (NULL != mdoc) { msec = mdoc_meta(mdoc)->msec; - arch = mdoc_meta(mdoc)->arch; + march = mdoc_meta(mdoc)->arch; + if (NULL == march) + march = ""; mtitle = mdoc_meta(mdoc)->title; } else if (NULL != man) { msec = man_meta(man)->msec; - arch = NULL; + march = ""; mtitle = man_meta(man)->title; } else { msec = of->sec; - arch = of->arch; + march = of->arch; mtitle = of->title; } /* * By default, skip a file if the manual section - * and architecture given in the file disagree - * with the directory where the file is located. + * given in the file disagrees with the directory + * where the file is located. */ - if (0 == use_all) { - assert(of->sec); - assert(msec); - if (strcmp(msec, of->sec)) - continue; - - if (NULL == arch) { - if (NULL != of->arch) - continue; - } else if (NULL == of->arch || - strcmp(arch, of->arch)) - continue; + skip = 0; + assert(of->sec); + assert(msec); + if (strcasecmp(msec, of->sec)) { + if (warnings) + fprintf(stderr, "%s: " + "section \"%s\" manual " + "in \"%s\" directory\n", + fn, msec, of->sec); + skip = 1; } - if (NULL == arch) - arch = ""; + /* + * Manual page directories exist for each kernel + * architecture as returned by machine(1). + * However, many manuals only depend on the + * application architecture as returned by arch(1). + * For example, some (2/ARM) manuals are shared + * across the "armish" and "zaurus" kernel + * architectures. + * A few manuals are even shared across completely + * different architectures, for example fdformat(1) + * on amd64, i386, sparc, and sparc64. + * Thus, warn about architecture mismatches, + * but don't skip manuals for this reason. + */ - /* + assert(of->arch); + assert(march); + if (strcasecmp(march, of->arch)) { + if (warnings) + fprintf(stderr, "%s: " + "architecture \"%s\" manual " + "in \"%s\" directory\n", + fn, march, of->arch); + march = of->arch; + } + + /* * By default, skip a file if the title given * in the file disagrees with the file name. * If both agree, use the file name as the title, @@ -592,13 +667,20 @@ index_merge(const struct of *of, struct mparse *mp, assert(of->title); assert(mtitle); - - if (0 == strcasecmp(mtitle, of->title)) + if (strcasecmp(mtitle, of->title)) { + if (warnings) + fprintf(stderr, "%s: " + "title \"%s\" in file " + "but \"%s\" in filename\n", + fn, mtitle, of->title); + skip = 1; + } else mtitle = of->title; - else if (0 == use_all) + + if (skip && !use_all) continue; - /* + /* * The index record value consists of a nil-terminated * filename, a nil-terminated manual section, and a * nil-terminated description. Since the description @@ -607,16 +689,21 @@ index_merge(const struct of *of, struct mparse *mp, */ dbuf->len = 0; - buf_append(dbuf, mdoc ? "mdoc" : (man ? "man" : "cat")); + type = mdoc ? 'd' : (man ? 'a' : 'c'); + buf_appendb(dbuf, &type, 1); buf_appendb(dbuf, fn, strlen(fn) + 1); buf_appendb(dbuf, msec, strlen(msec) + 1); buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); - buf_appendb(dbuf, arch, strlen(arch) + 1); + buf_appendb(dbuf, march, strlen(march) + 1); sv = dbuf->len; - /* Fix the record number in the btree value. */ + /* + * Collect keyword/mask pairs. + * Each pair will become a new btree node. + */ + hash_reset(&hash); if (mdoc) pmdoc_node(hash, buf, dbuf, mdoc_node(mdoc), mdoc_meta(mdoc)); @@ -625,25 +712,46 @@ index_merge(const struct of *of, struct mparse *mp, else pformatted(hash, buf, dbuf, of); + /* Test mode, do not access any database. */ + + if (NULL == mdb->db || NULL == mdb->idx) + continue; + /* - * Copy from the in-memory hashtable of pending keywords - * into the database. + * Reclaim an empty index record, if available. + * Use its record number for all new btree nodes. */ - vbuf.rec = htobe32(rec); + if (recs->cur > 0) { + recs->cur--; + rec = recs->stack[(int)recs->cur]; + } else if (recs->last > 0) { + rec = recs->last; + recs->last = 0; + } else + rec++; + vbuf[1] = htobe64(rec); + + /* + * Copy from the in-memory hashtable of pending + * keyword/mask pairs into the database. + */ + seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - vbuf.mask = htobe64(*(uint64_t *)val.data); - val.size = sizeof(struct db_val); + assert(sizeof(uint64_t) == val.size); + memcpy(&mask, val.data, val.size); + vbuf[0] = htobe64(mask); + val.size = sizeof(vbuf); val.data = &vbuf; - dbt_put(db, dbf, &key, &val); + dbt_put(mdb->db, mdb->dbn, &key, &val); } if (ch < 0) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } - + /* * Apply to the index. If we haven't had a description * set, put an empty one in now. @@ -659,9 +767,9 @@ index_merge(const struct of *of, struct mparse *mp, val.size = dbuf->len; if (verb) - printf("%s: Added index\n", fn); + printf("%s: adding to index\n", fn); - dbt_put(idx, idxf, &key, &val); + dbt_put(mdb->idx, mdb->idxn, &key, &val); } } @@ -672,24 +780,21 @@ index_merge(const struct of *of, struct mparse *mp, * in `idx' (zeroing its value size). */ static void -index_prune(const struct of *ofile, DB *db, const char *dbf, - DB *idx, const char *idxf, - recno_t *maxrec, recno_t **recs, size_t *recsz) +index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) { const struct of *of; - const char *fn, *cp; - struct db_val *vbuf; + const char *fn; + uint64_t vbuf[2]; unsigned seq, sseq; DBT key, val; - size_t reccur; int ch; - reccur = 0; + recs->cur = 0; seq = R_FIRST; - while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { + while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { seq = R_NEXT; - *maxrec = *(recno_t *)key.data; - cp = val.data; + assert(sizeof(recno_t) == key.size); + memcpy(&recs->last, key.data, key.size); /* Deleted records are zero-sized. Skip them. */ @@ -703,19 +808,16 @@ index_prune(const struct of *ofile, DB *db, const char * Failing any of these, we go into our error handler. */ - if (NULL == (fn = memchr(cp, '\0', val.size))) + fn = (char *)val.data + 1; + if (NULL == memchr(fn, '\0', val.size - 1)) break; - if (++fn - cp >= (int)val.size) - break; - if (NULL == memchr(fn, '\0', val.size - (fn - cp))) - break; - /* + /* * Search for the file in those we care about. * XXX: build this into a tree. Too slow. */ - for (of = ofile; of; of = of->next) + for (of = ofile->first; of; of = of->next) if (0 == strcmp(fn, of->fname)) break; @@ -728,55 +830,58 @@ index_prune(const struct of *ofile, DB *db, const char */ sseq = R_FIRST; - while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { + while (0 == (ch = (*mdb->db->seq)(mdb->db, + &key, &val, sseq))) { sseq = R_NEXT; - if (sizeof(struct db_val) != val.size) + if (sizeof(vbuf) != val.size) break; - vbuf = val.data; - if (*maxrec != betoh32(vbuf->rec)) + memcpy(vbuf, val.data, val.size); + if (recs->last != betoh64(vbuf[1])) continue; - if ((ch = (*db->del)(db, &key, R_CURSOR)) < 0) + if ((ch = (*mdb->db->del)(mdb->db, + &key, R_CURSOR)) < 0) break; } if (ch < 0) { - perror(dbf); + perror(mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { - fprintf(stderr, "%s: Corrupt database\n", dbf); + fprintf(stderr, "%s: corrupt database\n", + mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } if (verb) - printf("%s: Deleted index\n", fn); + printf("%s: deleting from index\n", fn); val.size = 0; - ch = (*idx->put)(idx, &key, &val, R_CURSOR); + ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); if (ch < 0) break; cont: - if (reccur >= *recsz) { - *recsz += MANDOC_SLOP; - *recs = mandoc_realloc - (*recs, *recsz * sizeof(recno_t)); + if (recs->cur >= recs->size) { + recs->size += MANDOC_SLOP; + recs->stack = mandoc_realloc(recs->stack, + recs->size * sizeof(recno_t)); } - (*recs)[(int)reccur] = *maxrec; - reccur++; + recs->stack[(int)recs->cur] = recs->last; + recs->cur++; } if (ch < 0) { - perror(idxf); + perror(mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { - fprintf(stderr, "%s: Corrupt index\n", idxf); + fprintf(stderr, "%s: corrupt index\n", mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } - (*maxrec)++; + recs->last++; } /* @@ -844,19 +949,7 @@ buf_appendmdoc(struct buf *buf, const struct mdoc_node } } -/* ARGSUSED */ static void -pmdoc_An(MDOC_ARGS) -{ - - if (SEC_AUTHORS != n->sec) - return; - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_An); -} - -static void hash_reset(DB **db) { DB *hash; @@ -872,26 +965,42 @@ hash_reset(DB **db) } /* ARGSUSED */ -static void +static int +pmdoc_head(MDOC_ARGS) +{ + + return(MDOC_HEAD == n->type); +} + +/* ARGSUSED */ +static int +pmdoc_body(MDOC_ARGS) +{ + + return(MDOC_BODY == n->type); +} + +/* ARGSUSED */ +static int pmdoc_Fd(MDOC_ARGS) { const char *start, *end; size_t sz; - + if (SEC_SYNOPSIS != n->sec) - return; + return(0); if (NULL == (n = n->child) || MDOC_TEXT != n->type) - return; + return(0); /* * Only consider those `Fd' macro fields that begin with an * "inclusion" token (versus, e.g., #define). */ if (strcmp("#include", n->string)) - return; + return(0); if (NULL == (n = n->next) || MDOC_TEXT != n->type) - return; + return(0); /* * Strip away the enclosing angle brackets and make sure we're @@ -903,7 +1012,7 @@ pmdoc_Fd(MDOC_ARGS) start++; if (0 == (sz = strlen(start))) - return; + return(0); end = &start[(int)sz - 1]; if ('>' == *end || '"' == *end) @@ -913,83 +1022,90 @@ pmdoc_Fd(MDOC_ARGS) buf_appendb(buf, start, (size_t)(end - start + 1)); buf_appendb(buf, "", 1); - - hash_put(hash, buf, TYPE_In); + return(1); } /* ARGSUSED */ -static void -pmdoc_Cd(MDOC_ARGS) -{ - - if (SEC_SYNOPSIS != n->sec) - return; - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Cd); -} - -/* ARGSUSED */ -static void +static int pmdoc_In(MDOC_ARGS) { - - if (SEC_SYNOPSIS != n->sec) - return; + if (NULL == n->child || MDOC_TEXT != n->child->type) - return; + return(0); buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_In); + return(1); } /* ARGSUSED */ -static void +static int pmdoc_Fn(MDOC_ARGS) { + struct mdoc_node *nn; const char *cp; - - if (SEC_SYNOPSIS != n->sec) - return; - if (NULL == n->child || MDOC_TEXT != n->child->type) - return; - /* .Fn "struct type *arg" "foo" */ + nn = n->child; - cp = strrchr(n->child->string, ' '); + if (NULL == nn || MDOC_TEXT != nn->type) + return(0); + + /* .Fn "struct type *name" "char *arg" */ + + cp = strrchr(nn->string, ' '); if (NULL == cp) - cp = n->child->string; + cp = nn->string; /* Strip away pointer symbol. */ while ('*' == *cp) cp++; + /* Store the function name. */ + buf_append(buf, cp); hash_put(hash, buf, TYPE_Fn); + + /* Store the function type. */ + + if (nn->string < cp) { + buf->len = 0; + buf_appendb(buf, nn->string, cp - nn->string); + buf_appendb(buf, "", 1); + hash_put(hash, buf, TYPE_Ft); + } + + /* Store the arguments. */ + + for (nn = nn->next; nn; nn = nn->next) { + if (MDOC_TEXT != nn->type) + continue; + buf->len = 0; + buf_append(buf, nn->string); + hash_put(hash, buf, TYPE_Fa); + } + + return(0); } /* ARGSUSED */ -static void +static int pmdoc_St(MDOC_ARGS) { - - if (SEC_STANDARDS != n->sec) - return; + if (NULL == n->child || MDOC_TEXT != n->child->type) - return; + return(0); buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_St); + return(1); } /* ARGSUSED */ -static void +static int pmdoc_Xr(MDOC_ARGS) { if (NULL == (n = n->child)) - return; + return(0); buf_appendb(buf, n->string, strlen(n->string)); @@ -999,133 +1115,49 @@ pmdoc_Xr(MDOC_ARGS) } else buf_appendb(buf, ".", 2); - hash_put(hash, buf, TYPE_Xr); + return(1); } /* ARGSUSED */ -static void -pmdoc_Vt(MDOC_ARGS) -{ - const char *start; - size_t sz; - - if (SEC_SYNOPSIS != n->sec) - return; - if (MDOC_Vt == n->tok && MDOC_BODY != n->type) - return; - if (NULL == n->last || MDOC_TEXT != n->last->type) - return; - - /* - * Strip away leading pointer symbol '*' and trailing ';'. - */ - - start = n->last->string; - - while ('*' == *start) - start++; - - if (0 == (sz = strlen(start))) - return; - - if (';' == start[(int)sz - 1]) - sz--; - - if (0 == sz) - return; - - buf_appendb(buf, start, sz); - buf_appendb(buf, "", 1); - hash_put(hash, buf, TYPE_Va); -} - -/* ARGSUSED */ -static void -pmdoc_Fo(MDOC_ARGS) -{ - - if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) - return; - if (NULL == n->child || MDOC_TEXT != n->child->type) - return; - - buf_append(buf, n->child->string); - hash_put(hash, buf, TYPE_Fn); -} - - -/* ARGSUSED */ -static void +static int pmdoc_Nd(MDOC_ARGS) { if (MDOC_BODY != n->type) - return; + return(0); buf_appendmdoc(dbuf, n->child, 1); - buf_appendmdoc(buf, n->child, 0); - - hash_put(hash, buf, TYPE_Nd); + return(1); } /* ARGSUSED */ -static void -pmdoc_Er(MDOC_ARGS) +static int +pmdoc_Nm(MDOC_ARGS) { - if (SEC_ERRORS != n->sec) - return; - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Er); -} + if (SEC_NAME == n->sec) + return(1); + else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return(0); -/* ARGSUSED */ -static void -pmdoc_Ev(MDOC_ARGS) -{ + if (NULL == n->child) + buf_append(buf, m->name); - if (SEC_ENVIRONMENT != n->sec) - return; - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Ev); + return(1); } /* ARGSUSED */ -static void -pmdoc_Pa(MDOC_ARGS) +static int +pmdoc_Sh(MDOC_ARGS) { - if (SEC_FILES != n->sec) - return; - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Pa); + return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); } -/* ARGSUSED */ static void -pmdoc_Nm(MDOC_ARGS) -{ - - if (SEC_NAME == n->sec) { - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Nm); - return; - } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) - return; - - if (NULL == n->child) - buf_append(buf, m->name); - - buf_appendmdoc(buf, n->child, 0); - hash_put(hash, buf, TYPE_Nm); -} - -static void hash_put(DB *db, const struct buf *buf, uint64_t mask) { + uint64_t oldmask; DBT key, val; int rc; @@ -1138,8 +1170,11 @@ hash_put(DB *db, const struct buf *buf, uint64_t mask) if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { perror("hash"); exit((int)MANDOCLEVEL_SYSERR); - } else if (0 == rc) - mask |= *(uint64_t *)val.data; + } else if (0 == rc) { + assert(sizeof(uint64_t) == val.size); + memcpy(&oldmask, val.data, val.size); + mask |= oldmask; + } val.data = &mask; val.size = sizeof(uint64_t); @@ -1186,11 +1221,36 @@ pmdoc_node(MDOC_ARGS) case (MDOC_BLOCK): /* FALLTHROUGH */ case (MDOC_ELEM): - if (NULL == mdocs[n->tok]) + buf->len = 0; + + /* + * Both NULL handlers and handlers returning true + * request using the data. Only skip the element + * when the handler returns false. + */ + + if (NULL != mdocs[n->tok].fp && + 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) break; - buf->len = 0; - (*mdocs[n->tok])(hash, buf, dbuf, n, m); + /* + * For many macros, use the text from all children. + * Set zero flags for macros not needing this. + * In that case, the handler must fill the buffer. + */ + + if (MDOCF_CHILD & mdocs[n->tok].flags) + buf_appendmdoc(buf, n->child, 0); + + /* + * Cover the most common case: + * Automatically stage one string per element. + * Set a zero mask for macros not needing this. + * Additional staging can be done in the handler. + */ + + if (mdocs[n->tok].mask) + hash_put(hash, buf, mdocs[n->tok].mask); break; default: break; @@ -1275,6 +1335,8 @@ pman_node(MAN_ARGS) if (0 == strncmp(start, "-", 1)) start += 1; + else if (0 == strncmp(start, "\\-\\-", 4)) + start += 4; else if (0 == strncmp(start, "\\-", 2)) start += 2; else if (0 == strncmp(start, "\\(en", 4)) @@ -1305,15 +1367,16 @@ pman_node(MAN_ARGS) * By necessity, this involves rather crude guesswork. */ static void -pformatted(DB *hash, struct buf *buf, struct buf *dbuf, - const struct of *of) +pformatted(DB *hash, struct buf *buf, + struct buf *dbuf, const struct of *of) { FILE *stream; - char *line, *p; - size_t len, plen; + char *line, *p, *title; + size_t len, plen, titlesz; if (NULL == (stream = fopen(of->fname, "r"))) { - perror(of->fname); + if (warnings) + perror(of->fname); return; } @@ -1328,59 +1391,112 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf buf_append(buf, of->title); hash_put(hash, buf, TYPE_Nm); - while (NULL != (line = fgetln(stream, &len)) && '\n' != *line) - /* Skip to first blank line. */ ; + /* Skip to first blank line. */ - while (NULL != (line = fgetln(stream, &len)) && - ('\n' == *line || ' ' == *line)) - /* Skip to first section header. */ ; + while (NULL != (line = fgetln(stream, &len))) + if ('\n' == *line) + break; /* - * If no page content can be found, - * reuse the page title as the page description. + * Assume the first line that is not indented + * is the first section header. Skip to it. */ - if (NULL == (line = fgetln(stream, &len))) { + while (NULL != (line = fgetln(stream, &len))) + if ('\n' != *line && ' ' != *line) + break; + + /* + * Read up until the next section into a buffer. + * Strip the leading and trailing newline from each read line, + * appending a trailing space. + * Ignore empty (whitespace-only) lines. + */ + + titlesz = 0; + title = NULL; + + while (NULL != (line = fgetln(stream, &len))) { + if (' ' != *line || '\n' != line[(int)len - 1]) + break; + while (len > 0 && isspace((unsigned char)*line)) { + line++; + len--; + } + if (1 == len) + continue; + title = mandoc_realloc(title, titlesz + len); + memcpy(title + titlesz, line, len); + titlesz += len; + title[(int)titlesz - 1] = ' '; + } + + + /* + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. + */ + + if (NULL == title || '\0' == *title) { + if (warnings) + fprintf(stderr, "%s: cannot find NAME section\n", + of->fname); buf_appendb(dbuf, buf->cp, buf->size); hash_put(hash, buf, TYPE_Nd); fclose(stream); + free(title); return; } - fclose(stream); + title = mandoc_realloc(title, titlesz + 1); + title[(int)titlesz] = '\0'; + /* - * If there is a dash, skip to the text following it. + * Skip to the first dash. + * Use the remaining line as the description (no more than 70 + * bytes). */ - for (p = line, plen = len; plen; p++, plen--) - if ('-' == *p) - break; - for ( ; plen; p++, plen--) - if ('-' != *p && ' ' != *p && 8 != *p) - break; - if (0 == plen) { - p = line; - plen = len; + if (NULL != (p = strstr(title, "- "))) { + for (p += 2; ' ' == *p || '\b' == *p; p++) + /* Skip to next word. */ ; + } else { + if (warnings) + fprintf(stderr, "%s: no dash in title line\n", + of->fname); + p = title; } - /* - * Copy the rest of the line, but no more than 70 bytes. - */ + plen = strlen(p); - if (70 < plen) - plen = 70; - p[plen-1] = '\0'; - buf_appendb(dbuf, p, plen); + /* Strip backspace-encoding from line. */ + + while (NULL != (line = memchr(p, '\b', plen))) { + len = line - p; + if (0 == len) { + memmove(line, line + 1, plen--); + continue; + } + memmove(line - 1, line + 1, plen - len); + plen -= 2; + } + + buf_appendb(dbuf, p, plen + 1); buf->len = 0; - buf_appendb(buf, p, plen); + buf_appendb(buf, p, plen + 1); hash_put(hash, buf, TYPE_Nd); + fclose(stream); + free(title); } static void ofile_argbuild(int argc, char *argv[], struct of **of) { char buf[MAXPATHLEN]; - char *sec, *arch, *title, *p; + const char *sec, *arch, *title; + char *p; int i, src_form; struct of *nof; @@ -1394,14 +1510,14 @@ ofile_argbuild(int argc, char *argv[], struct of **of) */ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { - fprintf(stderr, "%s: Path too long\n", argv[i]); + fprintf(stderr, "%s: path too long\n", argv[i]); continue; } - sec = arch = title = NULL; + sec = arch = title = ""; src_form = 0; p = strrchr(buf, '\0'); while (p-- > buf) { - if (NULL == sec && '.' == *p) { + if ('\0' == *sec && '.' == *p) { sec = p + 1; *p = '\0'; if ('0' == *sec) @@ -1412,22 +1528,27 @@ ofile_argbuild(int argc, char *argv[], struct of **of) } if ('/' != *p) continue; - if (NULL == title) { + if ('\0' == *title) { title = p + 1; *p = '\0'; continue; } - if (strncmp("man", p + 1, 3)) { + if (0 == strncmp("man", p + 1, 3)) src_form |= MANDOC_SRC; - arch = p + 1; - } else if (strncmp("cat", p + 1, 3)) { + else if (0 == strncmp("cat", p + 1, 3)) src_form |= MANDOC_FORM; + else arch = p + 1; - } break; } - if (NULL == title) + if ('\0' == *title) { + if (warnings) + fprintf(stderr, + "%s: cannot deduce title " + "from filename\n", + argv[i]); title = buf; + } /* * Build the file structure. @@ -1435,10 +1556,8 @@ ofile_argbuild(int argc, char *argv[], struct of **of) nof = mandoc_calloc(1, sizeof(struct of)); nof->fname = mandoc_strdup(argv[i]); - if (NULL != sec) - nof->sec = mandoc_strdup(sec); - if (NULL != arch) - nof->arch = mandoc_strdup(arch); + nof->sec = mandoc_strdup(sec); + nof->arch = mandoc_strdup(arch); nof->title = mandoc_strdup(title); nof->src_form = src_form; @@ -1446,8 +1565,8 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", argv[i]); + if (verb > 1) + printf("%s: scheduling\n", argv[i]); if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1463,16 +1582,15 @@ ofile_argbuild(int argc, char *argv[], struct of **of) * Recursively build up a list of files to parse. * We use this instead of ftw() and so on because I don't want global * variables hanging around. - * This ignores the mandoc.db and mandoc.index files, but assumes that + * This ignores the whatis.db and whatis.index files, but assumes that * everything else is a manual. * Pass in a pointer to a NULL structure for the first invocation. */ -static int +static void ofile_dirbuild(const char *dir, const char* psec, const char *parch, int p_src_form, struct of **of) { char buf[MAXPATHLEN]; - struct stat sb; size_t sz; DIR *d; const char *fn, *sec, *arch; @@ -1482,8 +1600,9 @@ ofile_dirbuild(const char *dir, const char* psec, cons int src_form; if (NULL == (d = opendir(dir))) { - perror(dir); - return(0); + if (warnings) + perror(dir); + return; } while (NULL != (dp = readdir(d))) { @@ -1504,22 +1623,37 @@ ofile_dirbuild(const char *dir, const char* psec, cons * cat
/[/] */ - if (NULL == sec) { + if ('\0' == *sec) { if(0 == strncmp("man", fn, 3)) { src_form |= MANDOC_SRC; sec = fn + 3; } else if (0 == strncmp("cat", fn, 3)) { src_form |= MANDOC_FORM; sec = fn + 3; - } else if (use_all) - sec = fn; - else - continue; - } else if (NULL == arch && (use_all || - NULL == strchr(fn, '.'))) + } else { + if (warnings) fprintf(stderr, + "%s/%s: bad section\n", + dir, fn); + if (use_all) + sec = fn; + else + continue; + } + } else if ('\0' == *arch) { + if (NULL != strchr(fn, '.')) { + if (warnings) fprintf(stderr, + "%s/%s: bad architecture\n", + dir, fn); + if (0 == use_all) + continue; + } arch = fn; - else if (0 == use_all) - continue; + } else { + if (warnings) fprintf(stderr, "%s/%s: " + "excessive subdirectory\n", dir, fn); + if (0 == use_all) + continue; + } buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); @@ -1527,22 +1661,35 @@ ofile_dirbuild(const char *dir, const char* psec, cons sz = strlcat(buf, fn, MAXPATHLEN); if (MAXPATHLEN <= sz) { - fprintf(stderr, "%s: Path too long\n", dir); - return(0); + if (warnings) fprintf(stderr, "%s/%s: " + "path too long\n", dir, fn); + continue; } - - if (verb > 2) - printf("%s: Scanning\n", buf); - if ( ! ofile_dirbuild(buf, sec, arch, - src_form, of)) - return(0); + if (verb > 1) + printf("%s: scanning\n", buf); + + ofile_dirbuild(buf, sec, arch, src_form, of); + continue; } - if (DT_REG != dp->d_type || - (NULL == psec && !use_all) || - !strcmp(MANDOC_DB, fn) || - !strcmp(MANDOC_IDX, fn)) + + if (DT_REG != dp->d_type) { + if (warnings) + fprintf(stderr, + "%s/%s: not a regular file\n", + dir, fn); continue; + } + if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) + continue; + if ('\0' == *psec) { + if (warnings) + fprintf(stderr, + "%s/%s: file outside section\n", + dir, fn); + if (0 == use_all) + continue; + } /* * By default, skip files where the file name suffix @@ -1551,23 +1698,29 @@ ofile_dirbuild(const char *dir, const char* psec, cons */ suffix = strrchr(fn, '.'); - if (0 == use_all) { - if (NULL == suffix) + if (NULL == suffix) { + if (warnings) + fprintf(stderr, + "%s/%s: no filename suffix\n", + dir, fn); + if (0 == use_all) continue; - if ((MANDOC_SRC & src_form && - strcmp(suffix + 1, psec)) || + } else if ((MANDOC_SRC & src_form && + strcmp(suffix + 1, psec)) || (MANDOC_FORM & src_form && - strcmp(suffix + 1, "0"))) - continue; - } - if (NULL != suffix) { + strcmp(suffix + 1, "0"))) { + if (warnings) + fprintf(stderr, + "%s/%s: wrong filename suffix\n", + dir, fn); + if (0 == use_all) + continue; if ('0' == suffix[1]) src_form |= MANDOC_FORM; else if ('1' <= suffix[1] && '9' >= suffix[1]) src_form |= MANDOC_SRC; } - /* * Skip formatted manuals if a source version is * available. Ignore the age: it is very unlikely @@ -1576,10 +1729,14 @@ ofile_dirbuild(const char *dir, const char* psec, cons * and in ports, old manuals get removed on update. */ if (0 == use_all && MANDOC_FORM & src_form && - NULL != psec) { + '\0' != *psec) { buf[0] = '\0'; strlcat(buf, dir, MAXPATHLEN); p = strrchr(buf, '/'); + if ('\0' != *parch && NULL != p) + for (p--; p > buf; p--) + if ('/' == *p) + break; if (NULL == p) p = buf; else @@ -1589,7 +1746,9 @@ ofile_dirbuild(const char *dir, const char* psec, cons strlcat(buf, "/", MAXPATHLEN); sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", buf); + if (warnings) fprintf(stderr, + "%s/%s: path too long\n", + dir, fn); continue; } q = strrchr(buf, '.'); @@ -1597,30 +1756,33 @@ ofile_dirbuild(const char *dir, const char* psec, cons *q = '\0'; sz = strlcat(buf, psec, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, - "%s: Path too long\n", buf); + if (warnings) fprintf(stderr, + "%s/%s: path too long\n", + dir, fn); continue; } - if (0 == stat(buf, &sb)) + if (0 == access(buf, R_OK)) continue; } } buf[0] = '\0'; - strlcat(buf, dir, MAXPATHLEN); - strlcat(buf, "/", MAXPATHLEN); + assert('.' == dir[0]); + if ('/' == dir[1]) { + strlcat(buf, dir + 2, MAXPATHLEN); + strlcat(buf, "/", MAXPATHLEN); + } sz = strlcat(buf, fn, MAXPATHLEN); if (sz >= MAXPATHLEN) { - fprintf(stderr, "%s: Path too long\n", dir); + if (warnings) fprintf(stderr, + "%s/%s: path too long\n", dir, fn); continue; } nof = mandoc_calloc(1, sizeof(struct of)); nof->fname = mandoc_strdup(buf); - if (NULL != psec) - nof->sec = mandoc_strdup(psec); - if (NULL != parch) - nof->arch = mandoc_strdup(parch); + nof->sec = mandoc_strdup(psec); + nof->arch = mandoc_strdup(parch); nof->src_form = src_form; /* @@ -1636,8 +1798,9 @@ ofile_dirbuild(const char *dir, const char* psec, cons * Add the structure to the list. */ - if (verb > 2) - printf("%s: Scheduling\n", buf); + if (verb > 1) + printf("%s: scheduling\n", buf); + if (NULL == *of) { *of = nof; (*of)->first = nof; @@ -1649,7 +1812,6 @@ ofile_dirbuild(const char *dir, const char* psec, cons } closedir(d); - return(1); } static void @@ -1657,7 +1819,10 @@ ofile_free(struct of *of) { struct of *nof; - while (of) { + if (NULL != of) + of = of->first; + + while (NULL != of) { nof = of->next; free(of->fname); free(of->sec); @@ -1666,14 +1831,4 @@ ofile_free(struct of *of) free(of); of = nof; } -} - -static void -usage(void) -{ - - fprintf(stderr, "usage: %s [-v] " - "[-d dir [files...] |" - " -u dir [files...] |" - " dir...]\n", progname); }