Annotation of mandoc/mandocdb.c, Revision 1.38
1.38 ! schwarze 1: /* $Id: mandocdb.c,v 1.37 2011/12/20 21:41:11 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.12 schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <sys/param.h>
1.14 schwarze 23: #include <sys/types.h>
1.1 kristaps 24:
25: #include <assert.h>
1.4 kristaps 26: #include <dirent.h>
1.1 kristaps 27: #include <fcntl.h>
28: #include <getopt.h>
29: #include <stdio.h>
30: #include <stdint.h>
31: #include <stdlib.h>
32: #include <string.h>
1.17 schwarze 33: #include <unistd.h>
1.1 kristaps 34:
1.21 kristaps 35: #if defined(__linux__)
36: # include <endian.h>
1.1 kristaps 37: # include <db_185.h>
1.21 kristaps 38: #elif defined(__APPLE__)
39: # include <libkern/OSByteOrder.h>
40: # include <db.h>
1.1 kristaps 41: #else
42: # include <db.h>
43: #endif
44:
45: #include "man.h"
46: #include "mdoc.h"
47: #include "mandoc.h"
1.8 schwarze 48: #include "mandocdb.h"
1.10 kristaps 49: #include "manpath.h"
1.1 kristaps 50:
51: #define MANDOC_BUFSZ BUFSIZ
52: #define MANDOC_SLOP 1024
53:
1.14 schwarze 54: #define MANDOC_SRC 0x1
55: #define MANDOC_FORM 0x2
56:
1.38 ! schwarze 57: /* Access to the mandoc database on disk. */
! 58:
! 59: struct mdb {
! 60: char idxn[MAXPATHLEN]; /* index db filename */
! 61: char dbn[MAXPATHLEN]; /* keyword db filename */
! 62: DB *idx; /* index recno database */
! 63: DB *db; /* keyword btree database */
! 64: };
! 65:
! 66: /* Stack of temporarily unused index records. */
! 67:
! 68: struct recs {
! 69: recno_t *stack; /* pointer to a malloc'ed array */
! 70: size_t size; /* number of allocated slots */
! 71: size_t cur; /* current number of empty records */
! 72: recno_t last; /* last record number in the index */
! 73: };
! 74:
1.5 kristaps 75: /* Tiny list for files. No need to bring in QUEUE. */
76:
1.3 kristaps 77: struct of {
1.5 kristaps 78: char *fname; /* heap-allocated */
1.12 schwarze 79: char *sec;
80: char *arch;
81: char *title;
1.14 schwarze 82: int src_form;
1.5 kristaps 83: struct of *next; /* NULL for last one */
84: struct of *first; /* first in list */
1.3 kristaps 85: };
86:
1.1 kristaps 87: /* Buffer for storing growable data. */
88:
89: struct buf {
90: char *cp;
1.5 kristaps 91: size_t len; /* current length */
92: size_t size; /* total buffer size */
1.1 kristaps 93: };
94:
95: /* Operation we're going to perform. */
96:
97: enum op {
1.38 ! schwarze 98: OP_DEFAULT = 0, /* new dbs from dir list or default config */
! 99: OP_CONFFILE, /* new databases from custom config file */
1.5 kristaps 100: OP_UPDATE, /* delete/add entries in existing database */
1.38 ! schwarze 101: OP_DELETE, /* delete entries from existing database */
! 102: OP_TEST /* change no databases, report potential problems */
1.1 kristaps 103: };
104:
105: #define MAN_ARGS DB *hash, \
106: struct buf *buf, \
107: struct buf *dbuf, \
108: const struct man_node *n
109: #define MDOC_ARGS DB *hash, \
110: struct buf *buf, \
111: struct buf *dbuf, \
112: const struct mdoc_node *n, \
113: const struct mdoc_meta *m
114:
115: static void buf_appendmdoc(struct buf *,
116: const struct mdoc_node *, int);
117: static void buf_append(struct buf *, const char *);
118: static void buf_appendb(struct buf *,
119: const void *, size_t);
120: static void dbt_put(DB *, const char *, DBT *, DBT *);
1.9 kristaps 121: static void hash_put(DB *, const struct buf *, uint64_t);
1.1 kristaps 122: static void hash_reset(DB **);
1.3 kristaps 123: static void index_merge(const struct of *, struct mparse *,
1.16 schwarze 124: struct buf *, struct buf *, DB *,
1.38 ! schwarze 125: struct mdb *, struct recs *);
! 126: static void index_prune(const struct of *, struct mdb *,
! 127: struct recs *);
1.16 schwarze 128: static void ofile_argbuild(int, char *[], struct of **);
1.35 kristaps 129: static void ofile_dirbuild(const char *, const char *,
1.16 schwarze 130: const char *, int, struct of **);
1.4 kristaps 131: static void ofile_free(struct of *);
1.14 schwarze 132: static void pformatted(DB *, struct buf *, struct buf *,
133: const struct of *);
1.1 kristaps 134: static int pman_node(MAN_ARGS);
135: static void pmdoc_node(MDOC_ARGS);
1.25 schwarze 136: static int pmdoc_head(MDOC_ARGS);
137: static int pmdoc_body(MDOC_ARGS);
138: static int pmdoc_Fd(MDOC_ARGS);
139: static int pmdoc_In(MDOC_ARGS);
140: static int pmdoc_Fn(MDOC_ARGS);
141: static int pmdoc_Nd(MDOC_ARGS);
142: static int pmdoc_Nm(MDOC_ARGS);
143: static int pmdoc_Sh(MDOC_ARGS);
144: static int pmdoc_St(MDOC_ARGS);
145: static int pmdoc_Xr(MDOC_ARGS);
1.1 kristaps 146:
1.25 schwarze 147: #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
1.1 kristaps 148:
1.25 schwarze 149: struct mdoc_handler {
150: int (*fp)(MDOC_ARGS); /* Optional handler. */
151: uint64_t mask; /* Set unless handler returns 0. */
152: int flags; /* For use by pmdoc_node. */
153: };
154:
155: static const struct mdoc_handler mdocs[MDOC_MAX] = {
156: { NULL, 0, 0 }, /* Ap */
157: { NULL, 0, 0 }, /* Dd */
158: { NULL, 0, 0 }, /* Dt */
159: { NULL, 0, 0 }, /* Os */
160: { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
161: { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
162: { NULL, 0, 0 }, /* Pp */
163: { NULL, 0, 0 }, /* D1 */
164: { NULL, 0, 0 }, /* Dl */
165: { NULL, 0, 0 }, /* Bd */
166: { NULL, 0, 0 }, /* Ed */
167: { NULL, 0, 0 }, /* Bl */
168: { NULL, 0, 0 }, /* El */
169: { NULL, 0, 0 }, /* It */
170: { NULL, 0, 0 }, /* Ad */
171: { NULL, TYPE_An, MDOCF_CHILD }, /* An */
172: { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
173: { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
174: { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
175: { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
176: { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
177: { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
178: { NULL, 0, 0 }, /* Ex */
179: { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
180: { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
181: { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
182: { pmdoc_Fn, 0, 0 }, /* Fn */
183: { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
184: { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
185: { pmdoc_In, TYPE_In, 0 }, /* In */
186: { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
187: { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
188: { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
189: { NULL, 0, 0 }, /* Op */
190: { NULL, 0, 0 }, /* Ot */
191: { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
192: { NULL, 0, 0 }, /* Rv */
193: { pmdoc_St, TYPE_St, 0 }, /* St */
194: { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
195: { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
196: { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
197: { NULL, 0, 0 }, /* %A */
198: { NULL, 0, 0 }, /* %B */
199: { NULL, 0, 0 }, /* %D */
200: { NULL, 0, 0 }, /* %I */
201: { NULL, 0, 0 }, /* %J */
202: { NULL, 0, 0 }, /* %N */
203: { NULL, 0, 0 }, /* %O */
204: { NULL, 0, 0 }, /* %P */
205: { NULL, 0, 0 }, /* %R */
206: { NULL, 0, 0 }, /* %T */
207: { NULL, 0, 0 }, /* %V */
208: { NULL, 0, 0 }, /* Ac */
209: { NULL, 0, 0 }, /* Ao */
210: { NULL, 0, 0 }, /* Aq */
211: { NULL, TYPE_At, MDOCF_CHILD }, /* At */
212: { NULL, 0, 0 }, /* Bc */
213: { NULL, 0, 0 }, /* Bf */
214: { NULL, 0, 0 }, /* Bo */
215: { NULL, 0, 0 }, /* Bq */
216: { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
217: { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
218: { NULL, 0, 0 }, /* Db */
219: { NULL, 0, 0 }, /* Dc */
220: { NULL, 0, 0 }, /* Do */
221: { NULL, 0, 0 }, /* Dq */
222: { NULL, 0, 0 }, /* Ec */
223: { NULL, 0, 0 }, /* Ef */
224: { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
225: { NULL, 0, 0 }, /* Eo */
226: { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
227: { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
228: { NULL, 0, 0 }, /* No */
229: { NULL, 0, 0 }, /* Ns */
230: { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
231: { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
232: { NULL, 0, 0 }, /* Pc */
233: { NULL, 0, 0 }, /* Pf */
234: { NULL, 0, 0 }, /* Po */
235: { NULL, 0, 0 }, /* Pq */
236: { NULL, 0, 0 }, /* Qc */
237: { NULL, 0, 0 }, /* Ql */
238: { NULL, 0, 0 }, /* Qo */
239: { NULL, 0, 0 }, /* Qq */
240: { NULL, 0, 0 }, /* Re */
241: { NULL, 0, 0 }, /* Rs */
242: { NULL, 0, 0 }, /* Sc */
243: { NULL, 0, 0 }, /* So */
244: { NULL, 0, 0 }, /* Sq */
245: { NULL, 0, 0 }, /* Sm */
246: { NULL, 0, 0 }, /* Sx */
247: { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
248: { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
249: { NULL, 0, 0 }, /* Ux */
250: { NULL, 0, 0 }, /* Xc */
251: { NULL, 0, 0 }, /* Xo */
252: { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
253: { NULL, 0, 0 }, /* Fc */
254: { NULL, 0, 0 }, /* Oo */
255: { NULL, 0, 0 }, /* Oc */
256: { NULL, 0, 0 }, /* Bk */
257: { NULL, 0, 0 }, /* Ek */
258: { NULL, 0, 0 }, /* Bt */
259: { NULL, 0, 0 }, /* Hf */
260: { NULL, 0, 0 }, /* Fr */
261: { NULL, 0, 0 }, /* Ud */
262: { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
263: { NULL, 0, 0 }, /* Lp */
264: { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
265: { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
266: { NULL, 0, 0 }, /* Brq */
267: { NULL, 0, 0 }, /* Bro */
268: { NULL, 0, 0 }, /* Brc */
269: { NULL, 0, 0 }, /* %C */
270: { NULL, 0, 0 }, /* Es */
271: { NULL, 0, 0 }, /* En */
272: { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
273: { NULL, 0, 0 }, /* %Q */
274: { NULL, 0, 0 }, /* br */
275: { NULL, 0, 0 }, /* sp */
276: { NULL, 0, 0 }, /* %U */
277: { NULL, 0, 0 }, /* Ta */
1.1 kristaps 278: };
279:
280: static const char *progname;
1.16 schwarze 281: static int use_all; /* Use all directories and files. */
282: static int verb; /* Output verbosity level. */
1.38 ! schwarze 283: static int warnings; /* Potential problems in manuals. */
1.1 kristaps 284:
285: int
286: main(int argc, char *argv[])
287: {
288: struct mparse *mp; /* parse sequence */
1.10 kristaps 289: struct manpaths dirs;
1.38 ! schwarze 290: struct mdb mdb;
! 291: struct recs recs;
1.1 kristaps 292: enum op op; /* current operation */
1.5 kristaps 293: const char *dir;
1.26 kristaps 294: char *cp;
1.38 ! schwarze 295: char pbuf[PATH_MAX];
1.16 schwarze 296: int ch, i, flags;
1.38 ! schwarze 297: DB *hash; /* temporary keyword hashtable */
1.1 kristaps 298: BTREEINFO info; /* btree configuration */
1.38 ! schwarze 299: size_t sz1, sz2;
1.1 kristaps 300: struct buf buf, /* keyword buffer */
301: dbuf; /* description buffer */
1.5 kristaps 302: struct of *of; /* list of files for processing */
1.1 kristaps 303: extern int optind;
304: extern char *optarg;
305:
306: progname = strrchr(argv[0], '/');
307: if (progname == NULL)
308: progname = argv[0];
309: else
310: ++progname;
311:
1.10 kristaps 312: memset(&dirs, 0, sizeof(struct manpaths));
1.38 ! schwarze 313: memset(&mdb, 0, sizeof(struct mdb));
! 314: memset(&recs, 0, sizeof(struct recs));
1.10 kristaps 315:
1.4 kristaps 316: of = NULL;
1.1 kristaps 317: mp = NULL;
318: hash = NULL;
1.38 ! schwarze 319: op = OP_DEFAULT;
1.5 kristaps 320: dir = NULL;
1.1 kristaps 321:
1.38 ! schwarze 322: while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
1.1 kristaps 323: switch (ch) {
1.12 schwarze 324: case ('a'):
325: use_all = 1;
326: break;
1.34 schwarze 327: case ('C'):
1.38 ! schwarze 328: if (op) {
! 329: fprintf(stderr,
! 330: "-C: conflicting options\n");
! 331: goto usage;
! 332: }
! 333: dir = optarg;
! 334: op = OP_CONFFILE;
1.34 schwarze 335: break;
1.5 kristaps 336: case ('d'):
1.38 ! schwarze 337: if (op) {
! 338: fprintf(stderr,
! 339: "-d: conflicting options\n");
! 340: goto usage;
! 341: }
1.5 kristaps 342: dir = optarg;
343: op = OP_UPDATE;
344: break;
1.38 ! schwarze 345: case ('t'):
! 346: dup2(STDOUT_FILENO, STDERR_FILENO);
! 347: if (op) {
! 348: fprintf(stderr,
! 349: "-t: conflicting options\n");
! 350: goto usage;
! 351: }
! 352: op = OP_TEST;
! 353: use_all = 1;
! 354: warnings = 1;
! 355: break;
1.5 kristaps 356: case ('u'):
1.38 ! schwarze 357: if (op) {
! 358: fprintf(stderr,
! 359: "-u: conflicting options\n");
! 360: goto usage;
! 361: }
1.5 kristaps 362: dir = optarg;
363: op = OP_DELETE;
364: break;
365: case ('v'):
366: verb++;
367: break;
1.38 ! schwarze 368: case ('W'):
! 369: warnings = 1;
! 370: break;
1.1 kristaps 371: default:
1.38 ! schwarze 372: goto usage;
1.1 kristaps 373: }
374:
375: argc -= optind;
376: argv += optind;
377:
1.38 ! schwarze 378: if (OP_CONFFILE == op && argc > 0) {
! 379: fprintf(stderr, "-C: too many arguments\n");
! 380: goto usage;
! 381: }
! 382:
1.4 kristaps 383: memset(&info, 0, sizeof(BTREEINFO));
384: info.flags = R_DUP;
1.1 kristaps 385:
1.4 kristaps 386: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
1.1 kristaps 387:
1.5 kristaps 388: memset(&buf, 0, sizeof(struct buf));
389: memset(&dbuf, 0, sizeof(struct buf));
1.1 kristaps 390:
1.4 kristaps 391: buf.size = dbuf.size = MANDOC_BUFSZ;
1.1 kristaps 392:
1.4 kristaps 393: buf.cp = mandoc_malloc(buf.size);
394: dbuf.cp = mandoc_malloc(dbuf.size);
1.1 kristaps 395:
1.38 ! schwarze 396: flags = O_CREAT | O_RDWR;
! 397: if (OP_DEFAULT == op || OP_CONFFILE == op)
! 398: flags |= O_TRUNC;
! 399:
! 400: if (OP_TEST == op) {
! 401: ofile_argbuild(argc, argv, &of);
! 402: if (NULL == of)
! 403: goto out;
! 404: index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
! 405: goto out;
! 406: }
1.5 kristaps 407:
408: if (OP_UPDATE == op || OP_DELETE == op) {
1.38 ! schwarze 409: strlcat(mdb.dbn, dir, MAXPATHLEN);
! 410: strlcat(mdb.dbn, "/", MAXPATHLEN);
! 411: sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
! 412:
! 413: strlcat(mdb.idxn, dir, MAXPATHLEN);
! 414: strlcat(mdb.idxn, "/", MAXPATHLEN);
! 415: sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
1.5 kristaps 416:
417: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
1.38 ! schwarze 418: fprintf(stderr, "%s: path too long\n", dir);
1.5 kristaps 419: exit((int)MANDOCLEVEL_BADARG);
420: }
421:
1.38 ! schwarze 422: mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
! 423: mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
1.5 kristaps 424:
1.38 ! schwarze 425: if (NULL == mdb.db) {
! 426: perror(mdb.dbn);
1.5 kristaps 427: exit((int)MANDOCLEVEL_SYSERR);
1.38 ! schwarze 428: } else if (NULL == mdb.idx) {
! 429: perror(mdb.idxn);
1.5 kristaps 430: exit((int)MANDOCLEVEL_SYSERR);
431: }
432:
1.35 kristaps 433: ofile_argbuild(argc, argv, &of);
1.5 kristaps 434:
435: if (NULL == of)
436: goto out;
437:
1.38 ! schwarze 438: index_prune(of, &mdb, &recs);
1.5 kristaps 439:
1.17 schwarze 440: /*
1.35 kristaps 441: * Go to the root of the respective manual tree.
442: * This must work or no manuals may be found (they're
443: * indexed relative to the root).
1.17 schwarze 444: */
445:
446: if (OP_UPDATE == op) {
1.35 kristaps 447: if (-1 == chdir(dir)) {
448: perror(dir);
449: exit((int)MANDOCLEVEL_SYSERR);
450: }
1.13 schwarze 451: index_merge(of, mp, &dbuf, &buf, hash,
1.38 ! schwarze 452: &mdb, &recs);
1.17 schwarze 453: }
1.5 kristaps 454:
455: goto out;
456: }
457:
1.10 kristaps 458: /*
459: * Configure the directories we're going to scan.
460: * If we have command-line arguments, use them.
461: * If not, we use man(1)'s method (see mandocdb.8).
462: */
463:
464: if (argc > 0) {
1.26 kristaps 465: dirs.paths = mandoc_calloc(argc, sizeof(char *));
1.10 kristaps 466: dirs.sz = argc;
1.26 kristaps 467: for (i = 0; i < argc; i++) {
468: if (NULL == (cp = realpath(argv[i], pbuf))) {
469: perror(argv[i]);
470: goto out;
471: }
472: dirs.paths[i] = mandoc_strdup(cp);
473: }
1.10 kristaps 474: } else
1.38 ! schwarze 475: manpath_parse(&dirs, dir, NULL, NULL);
1.10 kristaps 476:
477: for (i = 0; i < dirs.sz; i++) {
1.38 ! schwarze 478: mdb.idxn[0] = mdb.dbn[0] = '\0';
1.1 kristaps 479:
1.38 ! schwarze 480: strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN);
! 481: strlcat(mdb.dbn, "/", MAXPATHLEN);
! 482: sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
! 483:
! 484: strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN);
! 485: strlcat(mdb.idxn, "/", MAXPATHLEN);
! 486: sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
1.1 kristaps 487:
1.5 kristaps 488: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
1.38 ! schwarze 489: fprintf(stderr, "%s: path too long\n",
1.10 kristaps 490: dirs.paths[i]);
1.5 kristaps 491: exit((int)MANDOCLEVEL_BADARG);
1.4 kristaps 492: }
1.3 kristaps 493:
1.38 ! schwarze 494: if (mdb.db)
! 495: (*mdb.db->close)(mdb.db);
! 496: if (mdb.idx)
! 497: (*mdb.idx->close)(mdb.idx);
1.13 schwarze 498:
1.38 ! schwarze 499: mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
! 500: mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
1.3 kristaps 501:
1.38 ! schwarze 502: if (NULL == mdb.db) {
! 503: perror(mdb.dbn);
1.5 kristaps 504: exit((int)MANDOCLEVEL_SYSERR);
1.38 ! schwarze 505: } else if (NULL == mdb.idx) {
! 506: perror(mdb.idxn);
1.5 kristaps 507: exit((int)MANDOCLEVEL_SYSERR);
508: }
509:
1.4 kristaps 510: ofile_free(of);
511: of = NULL;
1.1 kristaps 512:
1.35 kristaps 513: if (-1 == chdir(dirs.paths[i])) {
514: perror(dirs.paths[i]);
1.5 kristaps 515: exit((int)MANDOCLEVEL_SYSERR);
1.38 ! schwarze 516: }
1.1 kristaps 517:
1.38 ! schwarze 518: ofile_dirbuild(".", "", "", 0, &of);
1.5 kristaps 519: if (NULL == of)
520: continue;
1.1 kristaps 521:
1.17 schwarze 522: /*
1.38 ! schwarze 523: * Go to the root of the respective manual tree.
1.35 kristaps 524: * This must work or no manuals may be found (they're
525: * indexed relative to the root).
1.17 schwarze 526: */
527:
1.35 kristaps 528: if (-1 == chdir(dirs.paths[i])) {
529: perror(dirs.paths[i]);
530: exit((int)MANDOCLEVEL_SYSERR);
531: }
532:
1.38 ! schwarze 533: index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
1.4 kristaps 534: }
1.3 kristaps 535:
1.5 kristaps 536: out:
1.38 ! schwarze 537: if (mdb.db)
! 538: (*mdb.db->close)(mdb.db);
! 539: if (mdb.idx)
! 540: (*mdb.idx->close)(mdb.idx);
1.3 kristaps 541: if (hash)
542: (*hash->close)(hash);
543: if (mp)
544: mparse_free(mp);
545:
1.10 kristaps 546: manpath_free(&dirs);
1.4 kristaps 547: ofile_free(of);
1.3 kristaps 548: free(buf.cp);
549: free(dbuf.cp);
1.38 ! schwarze 550: free(recs.stack);
1.3 kristaps 551:
1.5 kristaps 552: return(MANDOCLEVEL_OK);
1.38 ! schwarze 553:
! 554: usage:
! 555: fprintf(stderr,
! 556: "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
! 557: " -d dir [file ...] | "
! 558: "-u dir [file ...]\n",
! 559: progname);
! 560:
! 561: return((int)MANDOCLEVEL_BADARG);
1.3 kristaps 562: }
563:
564: void
565: index_merge(const struct of *of, struct mparse *mp,
1.16 schwarze 566: struct buf *dbuf, struct buf *buf, DB *hash,
1.38 ! schwarze 567: struct mdb *mdb, struct recs *recs)
1.3 kristaps 568: {
569: recno_t rec;
1.38 ! schwarze 570: int ch, skip;
1.3 kristaps 571: DBT key, val;
572: struct mdoc *mdoc;
573: struct man *man;
1.38 ! schwarze 574: const char *fn, *msec, *march, *mtitle;
1.37 schwarze 575: uint64_t mask;
1.3 kristaps 576: size_t sv;
577: unsigned seq;
1.9 kristaps 578: struct db_val vbuf;
1.36 kristaps 579: char type;
1.3 kristaps 580:
1.38 ! schwarze 581: rec = 0;
! 582: for (of = of->first; of; of = of->next) {
1.3 kristaps 583: fn = of->fname;
1.14 schwarze 584:
585: /*
1.33 schwarze 586: * Try interpreting the file as mdoc(7) or man(7)
587: * source code, unless it is already known to be
588: * formatted. Fall back to formatted mode.
1.14 schwarze 589: */
590:
1.1 kristaps 591: mparse_reset(mp);
1.14 schwarze 592: mdoc = NULL;
593: man = NULL;
1.1 kristaps 594:
1.14 schwarze 595: if ((MANDOC_SRC & of->src_form ||
596: ! (MANDOC_FORM & of->src_form)) &&
597: MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
598: mparse_result(mp, &mdoc, &man);
599:
600: if (NULL != mdoc) {
601: msec = mdoc_meta(mdoc)->msec;
1.38 ! schwarze 602: march = mdoc_meta(mdoc)->arch;
! 603: if (NULL == march)
! 604: march = "";
1.14 schwarze 605: mtitle = mdoc_meta(mdoc)->title;
606: } else if (NULL != man) {
607: msec = man_meta(man)->msec;
1.38 ! schwarze 608: march = "";
1.14 schwarze 609: mtitle = man_meta(man)->title;
610: } else {
611: msec = of->sec;
1.38 ! schwarze 612: march = of->arch;
1.14 schwarze 613: mtitle = of->title;
1.1 kristaps 614: }
615:
1.12 schwarze 616: /*
617: * By default, skip a file if the manual section
618: * and architecture given in the file disagree
619: * with the directory where the file is located.
620: */
621:
1.38 ! schwarze 622: skip = 0;
! 623: assert(of->sec);
! 624: assert(msec);
! 625: if (strcasecmp(msec, of->sec)) {
! 626: if (warnings)
! 627: fprintf(stderr, "%s: "
! 628: "section \"%s\" manual "
! 629: "in \"%s\" directory\n",
! 630: fn, msec, of->sec);
! 631: skip = 1;
! 632: }
! 633:
! 634: assert(of->arch);
! 635: assert(march);
! 636: if (strcasecmp(march, of->arch)) {
! 637: if (warnings)
! 638: fprintf(stderr, "%s: "
! 639: "architecture \"%s\" manual "
! 640: "in \"%s\" directory\n",
! 641: fn, march, of->arch);
! 642: skip = 1;
1.12 schwarze 643: }
644:
1.38 ! schwarze 645: /*
1.12 schwarze 646: * By default, skip a file if the title given
647: * in the file disagrees with the file name.
648: * If both agree, use the file name as the title,
649: * because the one in the file usually is all caps.
650: */
651:
652: assert(of->title);
653: assert(mtitle);
1.38 ! schwarze 654: if (strcasecmp(mtitle, of->title)) {
! 655: if (warnings)
! 656: fprintf(stderr, "%s: "
! 657: "title \"%s\" in file "
! 658: "but \"%s\" in filename\n",
! 659: fn, mtitle, of->title);
! 660: skip = 1;
! 661: } else
! 662: mtitle = of->title;
1.12 schwarze 663:
1.38 ! schwarze 664: if (skip && !use_all)
1.12 schwarze 665: continue;
666:
1.38 ! schwarze 667: /*
1.1 kristaps 668: * The index record value consists of a nil-terminated
669: * filename, a nil-terminated manual section, and a
670: * nil-terminated description. Since the description
671: * may not be set, we set a sentinel to see if we're
672: * going to write a nil byte in its place.
673: */
674:
1.3 kristaps 675: dbuf->len = 0;
1.36 kristaps 676: type = mdoc ? 'd' : (man ? 'a' : 'c');
677: buf_appendb(dbuf, &type, 1);
1.3 kristaps 678: buf_appendb(dbuf, fn, strlen(fn) + 1);
679: buf_appendb(dbuf, msec, strlen(msec) + 1);
680: buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
1.38 ! schwarze 681: buf_appendb(dbuf, march, strlen(march) + 1);
1.1 kristaps 682:
1.3 kristaps 683: sv = dbuf->len;
1.1 kristaps 684:
1.33 schwarze 685: /*
686: * Collect keyword/mask pairs.
687: * Each pair will become a new btree node.
688: */
1.1 kristaps 689:
1.33 schwarze 690: hash_reset(&hash);
1.1 kristaps 691: if (mdoc)
1.3 kristaps 692: pmdoc_node(hash, buf, dbuf,
1.1 kristaps 693: mdoc_node(mdoc), mdoc_meta(mdoc));
1.14 schwarze 694: else if (man)
1.3 kristaps 695: pman_node(hash, buf, dbuf, man_node(man));
1.14 schwarze 696: else
697: pformatted(hash, buf, dbuf, of);
1.1 kristaps 698:
1.38 ! schwarze 699: /* Test mode, do not access any database. */
! 700:
! 701: if (NULL == mdb->db || NULL == mdb->idx)
! 702: continue;
! 703:
1.1 kristaps 704: /*
1.33 schwarze 705: * Reclaim an empty index record, if available.
706: * Use its record number for all new btree nodes.
1.1 kristaps 707: */
708:
1.38 ! schwarze 709: if (recs->cur > 0) {
! 710: recs->cur--;
! 711: rec = recs->stack[(int)recs->cur];
! 712: } else if (recs->last > 0) {
! 713: rec = recs->last;
! 714: recs->last = 0;
1.33 schwarze 715: } else
716: rec++;
1.20 kristaps 717: vbuf.rec = htobe32(rec);
1.33 schwarze 718:
719: /*
720: * Copy from the in-memory hashtable of pending
721: * keyword/mask pairs into the database.
722: */
723:
1.1 kristaps 724: seq = R_FIRST;
725: while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
726: seq = R_NEXT;
1.37 schwarze 727: assert(sizeof(uint64_t) == val.size);
728: memcpy(&mask, val.data, val.size);
729: vbuf.mask = htobe64(mask);
1.9 kristaps 730: val.size = sizeof(struct db_val);
731: val.data = &vbuf;
1.38 ! schwarze 732: dbt_put(mdb->db, mdb->dbn, &key, &val);
1.1 kristaps 733: }
734: if (ch < 0) {
735: perror("hash");
736: exit((int)MANDOCLEVEL_SYSERR);
737: }
1.38 ! schwarze 738:
1.1 kristaps 739: /*
740: * Apply to the index. If we haven't had a description
741: * set, put an empty one in now.
742: */
743:
1.3 kristaps 744: if (dbuf->len == sv)
745: buf_appendb(dbuf, "", 1);
1.1 kristaps 746:
747: key.data = &rec;
748: key.size = sizeof(recno_t);
749:
1.3 kristaps 750: val.data = dbuf->cp;
751: val.size = dbuf->len;
1.1 kristaps 752:
1.5 kristaps 753: if (verb)
1.38 ! schwarze 754: printf("%s: adding to index\n", fn);
1.18 kristaps 755:
1.38 ! schwarze 756: dbt_put(mdb->idx, mdb->idxn, &key, &val);
1.3 kristaps 757: }
758: }
759:
760: /*
761: * Scan through all entries in the index file `idx' and prune those
762: * entries in `ofile'.
763: * Pruning consists of removing from `db', then invalidating the entry
764: * in `idx' (zeroing its value size).
765: */
766: static void
1.38 ! schwarze 767: index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
1.3 kristaps 768: {
769: const struct of *of;
1.36 kristaps 770: const char *fn;
1.9 kristaps 771: struct db_val *vbuf;
1.3 kristaps 772: unsigned seq, sseq;
773: DBT key, val;
774: int ch;
775:
1.38 ! schwarze 776: recs->cur = 0;
1.3 kristaps 777: seq = R_FIRST;
1.38 ! schwarze 778: while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
1.3 kristaps 779: seq = R_NEXT;
1.37 schwarze 780: assert(sizeof(recno_t) == key.size);
1.38 ! schwarze 781: memcpy(&recs->last, key.data, key.size);
1.18 kristaps 782:
783: /* Deleted records are zero-sized. Skip them. */
784:
785: if (0 == val.size)
786: goto cont;
787:
788: /*
789: * Make sure we're sane.
790: * Read past our mdoc/man/cat type to the next string,
791: * then make sure it's bounded by a NUL.
792: * Failing any of these, we go into our error handler.
793: */
794:
1.36 kristaps 795: fn = (char *)val.data + 1;
796: if (NULL == memchr(fn, '\0', val.size - 1))
1.18 kristaps 797: break;
798:
1.38 ! schwarze 799: /*
1.18 kristaps 800: * Search for the file in those we care about.
801: * XXX: build this into a tree. Too slow.
802: */
1.3 kristaps 803:
1.38 ! schwarze 804: for (of = ofile->first; of; of = of->next)
1.3 kristaps 805: if (0 == strcmp(fn, of->fname))
806: break;
807:
808: if (NULL == of)
809: continue;
810:
1.18 kristaps 811: /*
812: * Search through the keyword database, throwing out all
813: * references to our file.
814: */
815:
1.3 kristaps 816: sseq = R_FIRST;
1.38 ! schwarze 817: while (0 == (ch = (*mdb->db->seq)(mdb->db,
! 818: &key, &val, sseq))) {
1.3 kristaps 819: sseq = R_NEXT;
1.18 kristaps 820: if (sizeof(struct db_val) != val.size)
821: break;
822:
1.9 kristaps 823: vbuf = val.data;
1.38 ! schwarze 824: if (recs->last != betoh32(vbuf->rec))
1.3 kristaps 825: continue;
1.18 kristaps 826:
1.38 ! schwarze 827: if ((ch = (*mdb->db->del)(mdb->db,
! 828: &key, R_CURSOR)) < 0)
1.3 kristaps 829: break;
830: }
1.18 kristaps 831:
1.3 kristaps 832: if (ch < 0) {
1.38 ! schwarze 833: perror(mdb->dbn);
1.3 kristaps 834: exit((int)MANDOCLEVEL_SYSERR);
1.18 kristaps 835: } else if (1 != ch) {
1.38 ! schwarze 836: fprintf(stderr, "%s: corrupt database\n",
! 837: mdb->dbn);
1.18 kristaps 838: exit((int)MANDOCLEVEL_SYSERR);
1.3 kristaps 839: }
1.1 kristaps 840:
1.5 kristaps 841: if (verb)
1.38 ! schwarze 842: printf("%s: deleting from index\n", fn);
1.1 kristaps 843:
1.3 kristaps 844: val.size = 0;
1.38 ! schwarze 845: ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
1.1 kristaps 846:
1.18 kristaps 847: if (ch < 0)
848: break;
849: cont:
1.38 ! schwarze 850: if (recs->cur >= recs->size) {
! 851: recs->size += MANDOC_SLOP;
! 852: recs->stack = mandoc_realloc(recs->stack,
! 853: recs->size * sizeof(recno_t));
1.3 kristaps 854: }
1.1 kristaps 855:
1.38 ! schwarze 856: recs->stack[(int)recs->cur] = recs->last;
! 857: recs->cur++;
1.3 kristaps 858: }
1.18 kristaps 859:
860: if (ch < 0) {
1.38 ! schwarze 861: perror(mdb->idxn);
1.18 kristaps 862: exit((int)MANDOCLEVEL_SYSERR);
863: } else if (1 != ch) {
1.38 ! schwarze 864: fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
1.18 kristaps 865: exit((int)MANDOCLEVEL_SYSERR);
866: }
867:
1.38 ! schwarze 868: recs->last++;
1.1 kristaps 869: }
870:
871: /*
872: * Grow the buffer (if necessary) and copy in a binary string.
873: */
874: static void
875: buf_appendb(struct buf *buf, const void *cp, size_t sz)
876: {
877:
878: /* Overshoot by MANDOC_BUFSZ. */
879:
880: while (buf->len + sz >= buf->size) {
881: buf->size = buf->len + sz + MANDOC_BUFSZ;
882: buf->cp = mandoc_realloc(buf->cp, buf->size);
883: }
884:
885: memcpy(buf->cp + (int)buf->len, cp, sz);
886: buf->len += sz;
887: }
888:
889: /*
890: * Append a nil-terminated string to the buffer.
891: * This can be invoked multiple times.
892: * The buffer string will be nil-terminated.
893: * If invoked multiple times, a space is put between strings.
894: */
895: static void
896: buf_append(struct buf *buf, const char *cp)
897: {
898: size_t sz;
899:
900: if (0 == (sz = strlen(cp)))
901: return;
902:
903: if (buf->len)
904: buf->cp[(int)buf->len - 1] = ' ';
905:
906: buf_appendb(buf, cp, sz + 1);
907: }
908:
909: /*
910: * Recursively add all text from a given node.
911: * This is optimised for general mdoc nodes in this context, which do
912: * not consist of subexpressions and having a recursive call for n->next
913: * would be wasteful.
914: * The "f" variable should be 0 unless called from pmdoc_Nd for the
915: * description buffer, which does not start at the beginning of the
916: * buffer.
917: */
918: static void
919: buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
920: {
921:
922: for ( ; n; n = n->next) {
923: if (n->child)
924: buf_appendmdoc(buf, n->child, f);
925:
926: if (MDOC_TEXT == n->type && f) {
927: f = 0;
928: buf_appendb(buf, n->string,
929: strlen(n->string) + 1);
930: } else if (MDOC_TEXT == n->type)
931: buf_append(buf, n->string);
932:
933: }
934: }
935:
936: static void
937: hash_reset(DB **db)
938: {
939: DB *hash;
940:
941: if (NULL != (hash = *db))
942: (*hash->close)(hash);
943:
1.5 kristaps 944: *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1.1 kristaps 945: if (NULL == *db) {
946: perror("hash");
947: exit((int)MANDOCLEVEL_SYSERR);
948: }
949: }
950:
951: /* ARGSUSED */
1.25 schwarze 952: static int
953: pmdoc_head(MDOC_ARGS)
954: {
955:
956: return(MDOC_HEAD == n->type);
957: }
958:
959: /* ARGSUSED */
960: static int
961: pmdoc_body(MDOC_ARGS)
962: {
963:
964: return(MDOC_BODY == n->type);
965: }
966:
967: /* ARGSUSED */
968: static int
1.1 kristaps 969: pmdoc_Fd(MDOC_ARGS)
970: {
971: const char *start, *end;
972: size_t sz;
1.25 schwarze 973:
1.1 kristaps 974: if (SEC_SYNOPSIS != n->sec)
1.25 schwarze 975: return(0);
1.1 kristaps 976: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1.25 schwarze 977: return(0);
1.1 kristaps 978:
979: /*
980: * Only consider those `Fd' macro fields that begin with an
981: * "inclusion" token (versus, e.g., #define).
982: */
983: if (strcmp("#include", n->string))
1.25 schwarze 984: return(0);
1.1 kristaps 985:
986: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1.25 schwarze 987: return(0);
1.1 kristaps 988:
989: /*
990: * Strip away the enclosing angle brackets and make sure we're
991: * not zero-length.
992: */
993:
994: start = n->string;
995: if ('<' == *start || '"' == *start)
996: start++;
997:
998: if (0 == (sz = strlen(start)))
1.25 schwarze 999: return(0);
1.1 kristaps 1000:
1001: end = &start[(int)sz - 1];
1002: if ('>' == *end || '"' == *end)
1003: end--;
1004:
1005: assert(end >= start);
1006:
1007: buf_appendb(buf, start, (size_t)(end - start + 1));
1008: buf_appendb(buf, "", 1);
1.25 schwarze 1009: return(1);
1.1 kristaps 1010: }
1011:
1012: /* ARGSUSED */
1.25 schwarze 1013: static int
1014: pmdoc_In(MDOC_ARGS)
1.1 kristaps 1015: {
1016:
1017: if (NULL == n->child || MDOC_TEXT != n->child->type)
1.25 schwarze 1018: return(0);
1.1 kristaps 1019:
1020: buf_append(buf, n->child->string);
1.25 schwarze 1021: return(1);
1.1 kristaps 1022: }
1023:
1024: /* ARGSUSED */
1.25 schwarze 1025: static int
1.1 kristaps 1026: pmdoc_Fn(MDOC_ARGS)
1027: {
1.25 schwarze 1028: struct mdoc_node *nn;
1.1 kristaps 1029: const char *cp;
1030:
1.25 schwarze 1031: nn = n->child;
1032:
1033: if (NULL == nn || MDOC_TEXT != nn->type)
1034: return(0);
1035:
1036: /* .Fn "struct type *name" "char *arg" */
1.1 kristaps 1037:
1.25 schwarze 1038: cp = strrchr(nn->string, ' ');
1.1 kristaps 1039: if (NULL == cp)
1.25 schwarze 1040: cp = nn->string;
1.1 kristaps 1041:
1042: /* Strip away pointer symbol. */
1043:
1044: while ('*' == *cp)
1045: cp++;
1046:
1.25 schwarze 1047: /* Store the function name. */
1048:
1.1 kristaps 1049: buf_append(buf, cp);
1.8 schwarze 1050: hash_put(hash, buf, TYPE_Fn);
1.25 schwarze 1051:
1052: /* Store the function type. */
1053:
1054: if (nn->string < cp) {
1055: buf->len = 0;
1056: buf_appendb(buf, nn->string, cp - nn->string);
1057: buf_appendb(buf, "", 1);
1058: hash_put(hash, buf, TYPE_Ft);
1059: }
1060:
1061: /* Store the arguments. */
1062:
1063: for (nn = nn->next; nn; nn = nn->next) {
1064: if (MDOC_TEXT != nn->type)
1065: continue;
1066: buf->len = 0;
1067: buf_append(buf, nn->string);
1068: hash_put(hash, buf, TYPE_Fa);
1069: }
1070:
1071: return(0);
1.1 kristaps 1072: }
1073:
1074: /* ARGSUSED */
1.25 schwarze 1075: static int
1.1 kristaps 1076: pmdoc_St(MDOC_ARGS)
1077: {
1.25 schwarze 1078:
1.1 kristaps 1079: if (NULL == n->child || MDOC_TEXT != n->child->type)
1.25 schwarze 1080: return(0);
1.1 kristaps 1081:
1082: buf_append(buf, n->child->string);
1.25 schwarze 1083: return(1);
1.1 kristaps 1084: }
1085:
1086: /* ARGSUSED */
1.25 schwarze 1087: static int
1.1 kristaps 1088: pmdoc_Xr(MDOC_ARGS)
1089: {
1090:
1091: if (NULL == (n = n->child))
1.25 schwarze 1092: return(0);
1.1 kristaps 1093:
1094: buf_appendb(buf, n->string, strlen(n->string));
1095:
1096: if (NULL != (n = n->next)) {
1097: buf_appendb(buf, ".", 1);
1098: buf_appendb(buf, n->string, strlen(n->string) + 1);
1099: } else
1100: buf_appendb(buf, ".", 2);
1101:
1.25 schwarze 1102: return(1);
1.1 kristaps 1103: }
1104:
1105: /* ARGSUSED */
1.25 schwarze 1106: static int
1.1 kristaps 1107: pmdoc_Nd(MDOC_ARGS)
1108: {
1109:
1110: if (MDOC_BODY != n->type)
1.25 schwarze 1111: return(0);
1.1 kristaps 1112:
1113: buf_appendmdoc(dbuf, n->child, 1);
1.25 schwarze 1114: return(1);
1.1 kristaps 1115: }
1116:
1117: /* ARGSUSED */
1.25 schwarze 1118: static int
1119: pmdoc_Nm(MDOC_ARGS)
1.1 kristaps 1120: {
1121:
1.25 schwarze 1122: if (SEC_NAME == n->sec)
1123: return(1);
1124: else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1125: return(0);
1.1 kristaps 1126:
1.25 schwarze 1127: if (NULL == n->child)
1128: buf_append(buf, m->name);
1.1 kristaps 1129:
1.25 schwarze 1130: return(1);
1.1 kristaps 1131: }
1132:
1133: /* ARGSUSED */
1.25 schwarze 1134: static int
1135: pmdoc_Sh(MDOC_ARGS)
1.1 kristaps 1136: {
1137:
1.25 schwarze 1138: return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1.1 kristaps 1139: }
1140:
1141: static void
1.9 kristaps 1142: hash_put(DB *db, const struct buf *buf, uint64_t mask)
1.1 kristaps 1143: {
1.37 schwarze 1144: uint64_t oldmask;
1.1 kristaps 1145: DBT key, val;
1146: int rc;
1147:
1148: if (buf->len < 2)
1149: return;
1150:
1151: key.data = buf->cp;
1152: key.size = buf->len;
1153:
1154: if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1155: perror("hash");
1156: exit((int)MANDOCLEVEL_SYSERR);
1.37 schwarze 1157: } else if (0 == rc) {
1158: assert(sizeof(uint64_t) == val.size);
1159: memcpy(&oldmask, val.data, val.size);
1160: mask |= oldmask;
1161: }
1.1 kristaps 1162:
1163: val.data = &mask;
1.9 kristaps 1164: val.size = sizeof(uint64_t);
1.1 kristaps 1165:
1166: if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1167: perror("hash");
1168: exit((int)MANDOCLEVEL_SYSERR);
1169: }
1170: }
1171:
1172: static void
1173: dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1174: {
1175:
1176: assert(key->size);
1177: assert(val->size);
1178:
1179: if (0 == (*db->put)(db, key, val, 0))
1180: return;
1181:
1182: perror(dbn);
1183: exit((int)MANDOCLEVEL_SYSERR);
1184: /* NOTREACHED */
1185: }
1186:
1187: /*
1188: * Call out to per-macro handlers after clearing the persistent database
1189: * key. If the macro sets the database key, flush it to the database.
1190: */
1191: static void
1192: pmdoc_node(MDOC_ARGS)
1193: {
1194:
1195: if (NULL == n)
1196: return;
1197:
1198: switch (n->type) {
1199: case (MDOC_HEAD):
1200: /* FALLTHROUGH */
1201: case (MDOC_BODY):
1202: /* FALLTHROUGH */
1203: case (MDOC_TAIL):
1204: /* FALLTHROUGH */
1205: case (MDOC_BLOCK):
1206: /* FALLTHROUGH */
1207: case (MDOC_ELEM):
1.25 schwarze 1208: buf->len = 0;
1209:
1210: /*
1211: * Both NULL handlers and handlers returning true
1212: * request using the data. Only skip the element
1213: * when the handler returns false.
1214: */
1215:
1216: if (NULL != mdocs[n->tok].fp &&
1217: 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1.1 kristaps 1218: break;
1219:
1.25 schwarze 1220: /*
1221: * For many macros, use the text from all children.
1222: * Set zero flags for macros not needing this.
1223: * In that case, the handler must fill the buffer.
1224: */
1225:
1226: if (MDOCF_CHILD & mdocs[n->tok].flags)
1227: buf_appendmdoc(buf, n->child, 0);
1228:
1229: /*
1230: * Cover the most common case:
1231: * Automatically stage one string per element.
1232: * Set a zero mask for macros not needing this.
1233: * Additional staging can be done in the handler.
1234: */
1235:
1236: if (mdocs[n->tok].mask)
1237: hash_put(hash, buf, mdocs[n->tok].mask);
1.1 kristaps 1238: break;
1239: default:
1240: break;
1241: }
1242:
1243: pmdoc_node(hash, buf, dbuf, n->child, m);
1244: pmdoc_node(hash, buf, dbuf, n->next, m);
1245: }
1246:
1247: static int
1248: pman_node(MAN_ARGS)
1249: {
1250: const struct man_node *head, *body;
1251: const char *start, *sv;
1252: size_t sz;
1253:
1254: if (NULL == n)
1255: return(0);
1256:
1257: /*
1258: * We're only searching for one thing: the first text child in
1259: * the BODY of a NAME section. Since we don't keep track of
1260: * sections in -man, run some hoops to find out whether we're in
1261: * the correct section or not.
1262: */
1263:
1264: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1265: body = n;
1266: assert(body->parent);
1267: if (NULL != (head = body->parent->head) &&
1268: 1 == head->nchild &&
1269: NULL != (head = (head->child)) &&
1270: MAN_TEXT == head->type &&
1271: 0 == strcmp(head->string, "NAME") &&
1272: NULL != (body = body->child) &&
1273: MAN_TEXT == body->type) {
1274:
1275: assert(body->string);
1276: start = sv = body->string;
1277:
1278: /*
1279: * Go through a special heuristic dance here.
1280: * This is why -man manuals are great!
1281: * (I'm being sarcastic: my eyes are bleeding.)
1282: * Conventionally, one or more manual names are
1283: * comma-specified prior to a whitespace, then a
1284: * dash, then a description. Try to puzzle out
1285: * the name parts here.
1286: */
1287:
1288: for ( ;; ) {
1289: sz = strcspn(start, " ,");
1290: if ('\0' == start[(int)sz])
1291: break;
1292:
1293: buf->len = 0;
1294: buf_appendb(buf, start, sz);
1295: buf_appendb(buf, "", 1);
1296:
1.8 schwarze 1297: hash_put(hash, buf, TYPE_Nm);
1.1 kristaps 1298:
1299: if (' ' == start[(int)sz]) {
1300: start += (int)sz + 1;
1301: break;
1302: }
1303:
1304: assert(',' == start[(int)sz]);
1305: start += (int)sz + 1;
1306: while (' ' == *start)
1307: start++;
1308: }
1309:
1310: buf->len = 0;
1311:
1312: if (sv == start) {
1313: buf_append(buf, start);
1314: return(1);
1315: }
1316:
1317: while (' ' == *start)
1318: start++;
1319:
1320: if (0 == strncmp(start, "-", 1))
1321: start += 1;
1322: else if (0 == strncmp(start, "\\-", 2))
1323: start += 2;
1324: else if (0 == strncmp(start, "\\(en", 4))
1325: start += 4;
1326: else if (0 == strncmp(start, "\\(em", 4))
1327: start += 4;
1328:
1329: while (' ' == *start)
1330: start++;
1331:
1332: sz = strlen(start) + 1;
1333: buf_appendb(dbuf, start, sz);
1334: buf_appendb(buf, start, sz);
1335:
1.8 schwarze 1336: hash_put(hash, buf, TYPE_Nd);
1.1 kristaps 1337: }
1338: }
1339:
1.7 schwarze 1340: for (n = n->child; n; n = n->next)
1341: if (pman_node(hash, buf, dbuf, n))
1342: return(1);
1.1 kristaps 1343:
1344: return(0);
1345: }
1346:
1.14 schwarze 1347: /*
1348: * Parse a formatted manual page.
1349: * By necessity, this involves rather crude guesswork.
1350: */
1351: static void
1352: pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
1353: const struct of *of)
1354: {
1355: FILE *stream;
1356: char *line, *p;
1357: size_t len, plen;
1358:
1359: if (NULL == (stream = fopen(of->fname, "r"))) {
1.38 ! schwarze 1360: if (warnings)
! 1361: perror(of->fname);
1.14 schwarze 1362: return;
1363: }
1364:
1365: /*
1366: * Always use the title derived from the filename up front,
1367: * do not even try to find it in the file. This also makes
1368: * sure we don't end up with an orphan index record, even if
1369: * the file content turns out to be completely unintelligible.
1370: */
1371:
1372: buf->len = 0;
1373: buf_append(buf, of->title);
1374: hash_put(hash, buf, TYPE_Nm);
1375:
1.31 schwarze 1376: /* Skip to first blank line. */
1.14 schwarze 1377:
1.28 kristaps 1378: while (NULL != (line = fgetln(stream, &len)))
1.31 schwarze 1379: if ('\n' == *line)
1.28 kristaps 1380: break;
1381:
1.31 schwarze 1382: /*
1383: * Assume the first line that is not indented
1384: * is the first section header. Skip to it.
1.28 kristaps 1385: */
1386:
1387: while (NULL != (line = fgetln(stream, &len)))
1.31 schwarze 1388: if ('\n' != *line && ' ' != *line)
1.28 kristaps 1389: break;
1.14 schwarze 1390:
1391: /*
1.31 schwarze 1392: * If no page content can be found, or the input line
1393: * is already the next section header, or there is no
1394: * trailing newline, reuse the page title as the page
1395: * description.
1.14 schwarze 1396: */
1397:
1.28 kristaps 1398: line = fgetln(stream, &len);
1.31 schwarze 1399: if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
1.38 ! schwarze 1400: if (warnings)
! 1401: fprintf(stderr, "%s: cannot find NAME section\n",
! 1402: of->fname);
1.14 schwarze 1403: buf_appendb(dbuf, buf->cp, buf->size);
1404: hash_put(hash, buf, TYPE_Nd);
1405: fclose(stream);
1406: return;
1407: }
1408:
1.28 kristaps 1409: line[(int)--len] = '\0';
1410:
1.31 schwarze 1411: /*
1412: * Skip to the first dash.
1.28 kristaps 1413: * Use the remaining line as the description (no more than 70
1414: * bytes).
1.14 schwarze 1415: */
1416:
1.30 kristaps 1417: if (NULL != (p = strstr(line, "- "))) {
1418: for (p += 2; ' ' == *p || '\b' == *p; p++)
1.28 kristaps 1419: /* Skip to next word. */ ;
1.38 ! schwarze 1420: } else {
! 1421: if (warnings)
! 1422: fprintf(stderr, "%s: no dash in title line\n",
! 1423: of->fname);
1.14 schwarze 1424: p = line;
1.38 ! schwarze 1425: }
1.28 kristaps 1426:
1427: if ((plen = strlen(p)) > 70) {
1428: plen = 70;
1429: p[plen] = '\0';
1.29 kristaps 1430: }
1431:
1432: /* Strip backspace-encoding from line. */
1433:
1434: while (NULL != (line = memchr(p, '\b', plen))) {
1435: len = line - p;
1436: if (0 == len) {
1437: memmove(line, line + 1, plen--);
1438: continue;
1439: }
1440: memmove(line - 1, line + 1, plen - len);
1441: plen -= 2;
1.14 schwarze 1442: }
1443:
1.28 kristaps 1444: buf_appendb(dbuf, p, plen + 1);
1.14 schwarze 1445: buf->len = 0;
1.28 kristaps 1446: buf_appendb(buf, p, plen + 1);
1.14 schwarze 1447: hash_put(hash, buf, TYPE_Nd);
1.28 kristaps 1448: fclose(stream);
1.14 schwarze 1449: }
1450:
1.5 kristaps 1451: static void
1.16 schwarze 1452: ofile_argbuild(int argc, char *argv[], struct of **of)
1.5 kristaps 1453: {
1.12 schwarze 1454: char buf[MAXPATHLEN];
1455: char *sec, *arch, *title, *p;
1.14 schwarze 1456: int i, src_form;
1.5 kristaps 1457: struct of *nof;
1458:
1459: for (i = 0; i < argc; i++) {
1.12 schwarze 1460:
1461: /*
1462: * Try to infer the manual section, architecture and
1463: * page title from the path, assuming it looks like
1.14 schwarze 1464: * man*[/<arch>]/<title>.<section> or
1465: * cat<section>[/<arch>]/<title>.0
1.12 schwarze 1466: */
1467:
1468: if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
1.38 ! schwarze 1469: fprintf(stderr, "%s: path too long\n", argv[i]);
1.12 schwarze 1470: continue;
1471: }
1.38 ! schwarze 1472: sec = arch = title = "";
1.14 schwarze 1473: src_form = 0;
1.12 schwarze 1474: p = strrchr(buf, '\0');
1475: while (p-- > buf) {
1.38 ! schwarze 1476: if ('\0' == *sec && '.' == *p) {
1.12 schwarze 1477: sec = p + 1;
1478: *p = '\0';
1.14 schwarze 1479: if ('0' == *sec)
1480: src_form |= MANDOC_FORM;
1481: else if ('1' <= *sec && '9' >= *sec)
1482: src_form |= MANDOC_SRC;
1.12 schwarze 1483: continue;
1484: }
1485: if ('/' != *p)
1486: continue;
1.38 ! schwarze 1487: if ('\0' == *title) {
1.12 schwarze 1488: title = p + 1;
1489: *p = '\0';
1490: continue;
1491: }
1.24 schwarze 1492: if (0 == strncmp("man", p + 1, 3))
1.14 schwarze 1493: src_form |= MANDOC_SRC;
1.24 schwarze 1494: else if (0 == strncmp("cat", p + 1, 3))
1.14 schwarze 1495: src_form |= MANDOC_FORM;
1.24 schwarze 1496: else
1.12 schwarze 1497: arch = p + 1;
1498: break;
1499: }
1.38 ! schwarze 1500: if ('\0' == *title) {
! 1501: if (warnings)
! 1502: fprintf(stderr,
! 1503: "%s: cannot deduce title "
! 1504: "from filename\n",
! 1505: argv[i]);
1.12 schwarze 1506: title = buf;
1.38 ! schwarze 1507: }
1.12 schwarze 1508:
1509: /*
1510: * Build the file structure.
1511: */
1512:
1.5 kristaps 1513: nof = mandoc_calloc(1, sizeof(struct of));
1.12 schwarze 1514: nof->fname = mandoc_strdup(argv[i]);
1.38 ! schwarze 1515: nof->sec = mandoc_strdup(sec);
! 1516: nof->arch = mandoc_strdup(arch);
1.12 schwarze 1517: nof->title = mandoc_strdup(title);
1.14 schwarze 1518: nof->src_form = src_form;
1.12 schwarze 1519:
1520: /*
1521: * Add the structure to the list.
1522: */
1523:
1.38 ! schwarze 1524: if (verb > 1)
! 1525: printf("%s: scheduling\n", argv[i]);
1.5 kristaps 1526: if (NULL == *of) {
1527: *of = nof;
1528: (*of)->first = nof;
1529: } else {
1530: nof->first = (*of)->first;
1531: (*of)->next = nof;
1532: *of = nof;
1533: }
1534: }
1535: }
1536:
1.4 kristaps 1537: /*
1538: * Recursively build up a list of files to parse.
1539: * We use this instead of ftw() and so on because I don't want global
1540: * variables hanging around.
1541: * This ignores the mandoc.db and mandoc.index files, but assumes that
1542: * everything else is a manual.
1543: * Pass in a pointer to a NULL structure for the first invocation.
1544: */
1.35 kristaps 1545: static void
1.12 schwarze 1546: ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1.16 schwarze 1547: int p_src_form, struct of **of)
1.4 kristaps 1548: {
1.5 kristaps 1549: char buf[MAXPATHLEN];
1550: size_t sz;
1.4 kristaps 1551: DIR *d;
1.12 schwarze 1552: const char *fn, *sec, *arch;
1.14 schwarze 1553: char *p, *q, *suffix;
1.4 kristaps 1554: struct of *nof;
1555: struct dirent *dp;
1.14 schwarze 1556: int src_form;
1.4 kristaps 1557:
1558: if (NULL == (d = opendir(dir))) {
1.38 ! schwarze 1559: if (warnings)
! 1560: perror(dir);
! 1561: return;
1.4 kristaps 1562: }
1563:
1564: while (NULL != (dp = readdir(d))) {
1565: fn = dp->d_name;
1.12 schwarze 1566:
1567: if ('.' == *fn)
1568: continue;
1569:
1.14 schwarze 1570: src_form = p_src_form;
1571:
1.4 kristaps 1572: if (DT_DIR == dp->d_type) {
1.12 schwarze 1573: sec = psec;
1574: arch = parch;
1575:
1576: /*
1577: * By default, only use directories called:
1.14 schwarze 1578: * man<section>/[<arch>/] or
1579: * cat<section>/[<arch>/]
1.12 schwarze 1580: */
1581:
1.38 ! schwarze 1582: if ('\0' == *sec) {
1.14 schwarze 1583: if(0 == strncmp("man", fn, 3)) {
1584: src_form |= MANDOC_SRC;
1.12 schwarze 1585: sec = fn + 3;
1.14 schwarze 1586: } else if (0 == strncmp("cat", fn, 3)) {
1587: src_form |= MANDOC_FORM;
1588: sec = fn + 3;
1.38 ! schwarze 1589: } else {
! 1590: if (warnings) fprintf(stderr,
! 1591: "%s/%s: bad section\n",
! 1592: dir, fn);
! 1593: if (use_all)
! 1594: sec = fn;
! 1595: else
! 1596: continue;
! 1597: }
! 1598: } else if ('\0' == *arch) {
! 1599: if (NULL != strchr(fn, '.')) {
! 1600: if (warnings) fprintf(stderr,
! 1601: "%s/%s: bad architecture\n",
! 1602: dir, fn);
! 1603: if (0 == use_all)
! 1604: continue;
! 1605: }
! 1606: arch = fn;
! 1607: } else {
! 1608: if (warnings) fprintf(stderr, "%s/%s: "
! 1609: "excessive subdirectory\n", dir, fn);
! 1610: if (0 == use_all)
1.12 schwarze 1611: continue;
1.38 ! schwarze 1612: }
1.5 kristaps 1613:
1614: buf[0] = '\0';
1615: strlcat(buf, dir, MAXPATHLEN);
1616: strlcat(buf, "/", MAXPATHLEN);
1617: sz = strlcat(buf, fn, MAXPATHLEN);
1618:
1.12 schwarze 1619: if (MAXPATHLEN <= sz) {
1.38 ! schwarze 1620: if (warnings) fprintf(stderr, "%s/%s: "
! 1621: "path too long\n", dir, fn);
! 1622: continue;
1.12 schwarze 1623: }
1.38 ! schwarze 1624:
! 1625: if (verb > 1)
! 1626: printf("%s: scanning\n", buf);
! 1627:
1.35 kristaps 1628: ofile_dirbuild(buf, sec, arch, src_form, of);
1.38 ! schwarze 1629: continue;
1.35 kristaps 1630: }
1.12 schwarze 1631:
1.38 ! schwarze 1632: if (DT_REG != dp->d_type) {
! 1633: if (warnings)
! 1634: fprintf(stderr,
! 1635: "%s/%s: not a regular file\n",
! 1636: dir, fn);
! 1637: continue;
! 1638: }
! 1639: if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1.12 schwarze 1640: continue;
1.38 ! schwarze 1641: if ('\0' == *psec) {
! 1642: if (warnings)
! 1643: fprintf(stderr,
! 1644: "%s/%s: file outside section\n",
! 1645: dir, fn);
! 1646: if (0 == use_all)
! 1647: continue;
! 1648: }
1.12 schwarze 1649:
1650: /*
1651: * By default, skip files where the file name suffix
1652: * does not agree with the section directory
1653: * they are located in.
1654: */
1655:
1656: suffix = strrchr(fn, '.');
1.38 ! schwarze 1657: if (NULL == suffix) {
! 1658: if (warnings)
! 1659: fprintf(stderr,
! 1660: "%s/%s: no filename suffix\n",
! 1661: dir, fn);
! 1662: if (0 == use_all)
1.5 kristaps 1663: continue;
1.38 ! schwarze 1664: } else if ((MANDOC_SRC & src_form &&
! 1665: strcmp(suffix + 1, psec)) ||
1.14 schwarze 1666: (MANDOC_FORM & src_form &&
1.38 ! schwarze 1667: strcmp(suffix + 1, "0"))) {
! 1668: if (warnings)
! 1669: fprintf(stderr,
! 1670: "%s/%s: wrong filename suffix\n",
! 1671: dir, fn);
! 1672: if (0 == use_all)
! 1673: continue;
1.14 schwarze 1674: if ('0' == suffix[1])
1675: src_form |= MANDOC_FORM;
1676: else if ('1' <= suffix[1] && '9' >= suffix[1])
1677: src_form |= MANDOC_SRC;
1678: }
1679:
1680: /*
1681: * Skip formatted manuals if a source version is
1682: * available. Ignore the age: it is very unlikely
1683: * that people install newer formatted base manuals
1684: * when they used to have source manuals before,
1685: * and in ports, old manuals get removed on update.
1686: */
1687: if (0 == use_all && MANDOC_FORM & src_form &&
1.38 ! schwarze 1688: '\0' != *psec) {
1.14 schwarze 1689: buf[0] = '\0';
1690: strlcat(buf, dir, MAXPATHLEN);
1691: p = strrchr(buf, '/');
1.38 ! schwarze 1692: if ('\0' != *parch && NULL != p)
1.32 schwarze 1693: for (p--; p > buf; p--)
1694: if ('/' == *p)
1695: break;
1.14 schwarze 1696: if (NULL == p)
1697: p = buf;
1698: else
1699: p++;
1700: if (0 == strncmp("cat", p, 3))
1701: memcpy(p, "man", 3);
1702: strlcat(buf, "/", MAXPATHLEN);
1703: sz = strlcat(buf, fn, MAXPATHLEN);
1704: if (sz >= MAXPATHLEN) {
1.38 ! schwarze 1705: if (warnings) fprintf(stderr,
! 1706: "%s/%s: path too long\n",
! 1707: dir, fn);
1.5 kristaps 1708: continue;
1.14 schwarze 1709: }
1710: q = strrchr(buf, '.');
1711: if (NULL != q && p < q++) {
1712: *q = '\0';
1713: sz = strlcat(buf, psec, MAXPATHLEN);
1714: if (sz >= MAXPATHLEN) {
1.38 ! schwarze 1715: if (warnings) fprintf(stderr,
! 1716: "%s/%s: path too long\n",
! 1717: dir, fn);
1.14 schwarze 1718: continue;
1719: }
1.35 kristaps 1720: if (0 == access(buf, R_OK))
1.14 schwarze 1721: continue;
1722: }
1.5 kristaps 1723: }
1.4 kristaps 1724:
1.38 ! schwarze 1725: buf[0] = '\0';
1.35 kristaps 1726: assert('.' == dir[0]);
1.38 ! schwarze 1727: if ('/' == dir[1]) {
! 1728: strlcat(buf, dir + 2, MAXPATHLEN);
! 1729: strlcat(buf, "/", MAXPATHLEN);
! 1730: }
1.6 schwarze 1731: sz = strlcat(buf, fn, MAXPATHLEN);
1.5 kristaps 1732: if (sz >= MAXPATHLEN) {
1.38 ! schwarze 1733: if (warnings) fprintf(stderr,
! 1734: "%s/%s: path too long\n", dir, fn);
1.14 schwarze 1735: continue;
1.5 kristaps 1736: }
1737:
1.4 kristaps 1738: nof = mandoc_calloc(1, sizeof(struct of));
1.5 kristaps 1739: nof->fname = mandoc_strdup(buf);
1.38 ! schwarze 1740: nof->sec = mandoc_strdup(psec);
! 1741: nof->arch = mandoc_strdup(parch);
1.14 schwarze 1742: nof->src_form = src_form;
1.12 schwarze 1743:
1744: /*
1745: * Remember the file name without the extension,
1746: * to be used as the page title in the database.
1747: */
1748:
1749: if (NULL != suffix)
1750: *suffix = '\0';
1751: nof->title = mandoc_strdup(fn);
1.5 kristaps 1752:
1.14 schwarze 1753: /*
1754: * Add the structure to the list.
1755: */
1756:
1.38 ! schwarze 1757: if (verb > 1)
! 1758: printf("%s: scheduling\n", buf);
1.4 kristaps 1759: if (NULL == *of) {
1760: *of = nof;
1761: (*of)->first = nof;
1762: } else {
1.5 kristaps 1763: nof->first = (*of)->first;
1.4 kristaps 1764: (*of)->next = nof;
1765: *of = nof;
1766: }
1767: }
1768:
1.7 schwarze 1769: closedir(d);
1.4 kristaps 1770: }
1771:
1772: static void
1773: ofile_free(struct of *of)
1774: {
1775: struct of *nof;
1776:
1777: while (of) {
1778: nof = of->next;
1779: free(of->fname);
1.12 schwarze 1780: free(of->sec);
1781: free(of->arch);
1782: free(of->title);
1.4 kristaps 1783: free(of);
1784: of = nof;
1785: }
1.1 kristaps 1786: }
CVSweb