Annotation of mandoc/mandocdb.c, Revision 1.33
1.33 ! schwarze 1: /* $Id: mandocdb.c,v 1.32 2011/12/10 16:53:39 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.12 schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
21:
22: #include <sys/param.h>
1.14 schwarze 23: #include <sys/types.h>
24: #include <sys/stat.h>
1.1 kristaps 25:
26: #include <assert.h>
1.4 kristaps 27: #include <dirent.h>
1.1 kristaps 28: #include <fcntl.h>
29: #include <getopt.h>
30: #include <stdio.h>
31: #include <stdint.h>
32: #include <stdlib.h>
33: #include <string.h>
1.17 schwarze 34: #include <unistd.h>
1.1 kristaps 35:
1.21 kristaps 36: #if defined(__linux__)
37: # include <endian.h>
1.1 kristaps 38: # include <db_185.h>
1.21 kristaps 39: #elif defined(__APPLE__)
40: # include <libkern/OSByteOrder.h>
41: # include <db.h>
1.1 kristaps 42: #else
43: # include <db.h>
44: #endif
45:
46: #include "man.h"
47: #include "mdoc.h"
48: #include "mandoc.h"
1.8 schwarze 49: #include "mandocdb.h"
1.10 kristaps 50: #include "manpath.h"
1.1 kristaps 51:
52: #define MANDOC_BUFSZ BUFSIZ
53: #define MANDOC_SLOP 1024
54:
1.14 schwarze 55: #define MANDOC_SRC 0x1
56: #define MANDOC_FORM 0x2
57:
1.5 kristaps 58: /* Tiny list for files. No need to bring in QUEUE. */
59:
1.3 kristaps 60: struct of {
1.5 kristaps 61: char *fname; /* heap-allocated */
1.12 schwarze 62: char *sec;
63: char *arch;
64: char *title;
1.14 schwarze 65: int src_form;
1.5 kristaps 66: struct of *next; /* NULL for last one */
67: struct of *first; /* first in list */
1.3 kristaps 68: };
69:
1.1 kristaps 70: /* Buffer for storing growable data. */
71:
72: struct buf {
73: char *cp;
1.5 kristaps 74: size_t len; /* current length */
75: size_t size; /* total buffer size */
1.1 kristaps 76: };
77:
78: /* Operation we're going to perform. */
79:
80: enum op {
81: OP_NEW = 0, /* new database */
1.5 kristaps 82: OP_UPDATE, /* delete/add entries in existing database */
1.1 kristaps 83: OP_DELETE /* delete entries from existing database */
84: };
85:
86: #define MAN_ARGS DB *hash, \
87: struct buf *buf, \
88: struct buf *dbuf, \
89: const struct man_node *n
90: #define MDOC_ARGS DB *hash, \
91: struct buf *buf, \
92: struct buf *dbuf, \
93: const struct mdoc_node *n, \
94: const struct mdoc_meta *m
95:
96: static void buf_appendmdoc(struct buf *,
97: const struct mdoc_node *, int);
98: static void buf_append(struct buf *, const char *);
99: static void buf_appendb(struct buf *,
100: const void *, size_t);
101: static void dbt_put(DB *, const char *, DBT *, DBT *);
1.9 kristaps 102: static void hash_put(DB *, const struct buf *, uint64_t);
1.1 kristaps 103: static void hash_reset(DB **);
1.3 kristaps 104: static void index_merge(const struct of *, struct mparse *,
1.16 schwarze 105: struct buf *, struct buf *, DB *,
106: DB *, const char *, DB *, const char *,
1.3 kristaps 107: recno_t, const recno_t *, size_t);
108: static void index_prune(const struct of *, DB *,
109: const char *, DB *, const char *,
1.27 schwarze 110: recno_t *, recno_t **, size_t *,
111: size_t *);
1.16 schwarze 112: static void ofile_argbuild(int, char *[], struct of **);
1.12 schwarze 113: static int ofile_dirbuild(const char *, const char *,
1.16 schwarze 114: const char *, int, struct of **);
1.4 kristaps 115: static void ofile_free(struct of *);
1.14 schwarze 116: static void pformatted(DB *, struct buf *, struct buf *,
117: const struct of *);
1.1 kristaps 118: static int pman_node(MAN_ARGS);
119: static void pmdoc_node(MDOC_ARGS);
1.25 schwarze 120: static int pmdoc_head(MDOC_ARGS);
121: static int pmdoc_body(MDOC_ARGS);
122: static int pmdoc_Fd(MDOC_ARGS);
123: static int pmdoc_In(MDOC_ARGS);
124: static int pmdoc_Fn(MDOC_ARGS);
125: static int pmdoc_Nd(MDOC_ARGS);
126: static int pmdoc_Nm(MDOC_ARGS);
127: static int pmdoc_Sh(MDOC_ARGS);
128: static int pmdoc_St(MDOC_ARGS);
129: static int pmdoc_Xr(MDOC_ARGS);
1.1 kristaps 130: static void usage(void);
131:
1.25 schwarze 132: #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
1.1 kristaps 133:
1.25 schwarze 134: struct mdoc_handler {
135: int (*fp)(MDOC_ARGS); /* Optional handler. */
136: uint64_t mask; /* Set unless handler returns 0. */
137: int flags; /* For use by pmdoc_node. */
138: };
139:
140: static const struct mdoc_handler mdocs[MDOC_MAX] = {
141: { NULL, 0, 0 }, /* Ap */
142: { NULL, 0, 0 }, /* Dd */
143: { NULL, 0, 0 }, /* Dt */
144: { NULL, 0, 0 }, /* Os */
145: { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
146: { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
147: { NULL, 0, 0 }, /* Pp */
148: { NULL, 0, 0 }, /* D1 */
149: { NULL, 0, 0 }, /* Dl */
150: { NULL, 0, 0 }, /* Bd */
151: { NULL, 0, 0 }, /* Ed */
152: { NULL, 0, 0 }, /* Bl */
153: { NULL, 0, 0 }, /* El */
154: { NULL, 0, 0 }, /* It */
155: { NULL, 0, 0 }, /* Ad */
156: { NULL, TYPE_An, MDOCF_CHILD }, /* An */
157: { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
158: { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
159: { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
160: { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
161: { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
162: { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
163: { NULL, 0, 0 }, /* Ex */
164: { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
165: { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
166: { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
167: { pmdoc_Fn, 0, 0 }, /* Fn */
168: { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
169: { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
170: { pmdoc_In, TYPE_In, 0 }, /* In */
171: { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
172: { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
173: { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
174: { NULL, 0, 0 }, /* Op */
175: { NULL, 0, 0 }, /* Ot */
176: { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
177: { NULL, 0, 0 }, /* Rv */
178: { pmdoc_St, TYPE_St, 0 }, /* St */
179: { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
180: { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
181: { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
182: { NULL, 0, 0 }, /* %A */
183: { NULL, 0, 0 }, /* %B */
184: { NULL, 0, 0 }, /* %D */
185: { NULL, 0, 0 }, /* %I */
186: { NULL, 0, 0 }, /* %J */
187: { NULL, 0, 0 }, /* %N */
188: { NULL, 0, 0 }, /* %O */
189: { NULL, 0, 0 }, /* %P */
190: { NULL, 0, 0 }, /* %R */
191: { NULL, 0, 0 }, /* %T */
192: { NULL, 0, 0 }, /* %V */
193: { NULL, 0, 0 }, /* Ac */
194: { NULL, 0, 0 }, /* Ao */
195: { NULL, 0, 0 }, /* Aq */
196: { NULL, TYPE_At, MDOCF_CHILD }, /* At */
197: { NULL, 0, 0 }, /* Bc */
198: { NULL, 0, 0 }, /* Bf */
199: { NULL, 0, 0 }, /* Bo */
200: { NULL, 0, 0 }, /* Bq */
201: { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
202: { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
203: { NULL, 0, 0 }, /* Db */
204: { NULL, 0, 0 }, /* Dc */
205: { NULL, 0, 0 }, /* Do */
206: { NULL, 0, 0 }, /* Dq */
207: { NULL, 0, 0 }, /* Ec */
208: { NULL, 0, 0 }, /* Ef */
209: { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
210: { NULL, 0, 0 }, /* Eo */
211: { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
212: { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
213: { NULL, 0, 0 }, /* No */
214: { NULL, 0, 0 }, /* Ns */
215: { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
216: { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
217: { NULL, 0, 0 }, /* Pc */
218: { NULL, 0, 0 }, /* Pf */
219: { NULL, 0, 0 }, /* Po */
220: { NULL, 0, 0 }, /* Pq */
221: { NULL, 0, 0 }, /* Qc */
222: { NULL, 0, 0 }, /* Ql */
223: { NULL, 0, 0 }, /* Qo */
224: { NULL, 0, 0 }, /* Qq */
225: { NULL, 0, 0 }, /* Re */
226: { NULL, 0, 0 }, /* Rs */
227: { NULL, 0, 0 }, /* Sc */
228: { NULL, 0, 0 }, /* So */
229: { NULL, 0, 0 }, /* Sq */
230: { NULL, 0, 0 }, /* Sm */
231: { NULL, 0, 0 }, /* Sx */
232: { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
233: { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
234: { NULL, 0, 0 }, /* Ux */
235: { NULL, 0, 0 }, /* Xc */
236: { NULL, 0, 0 }, /* Xo */
237: { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
238: { NULL, 0, 0 }, /* Fc */
239: { NULL, 0, 0 }, /* Oo */
240: { NULL, 0, 0 }, /* Oc */
241: { NULL, 0, 0 }, /* Bk */
242: { NULL, 0, 0 }, /* Ek */
243: { NULL, 0, 0 }, /* Bt */
244: { NULL, 0, 0 }, /* Hf */
245: { NULL, 0, 0 }, /* Fr */
246: { NULL, 0, 0 }, /* Ud */
247: { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
248: { NULL, 0, 0 }, /* Lp */
249: { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
250: { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
251: { NULL, 0, 0 }, /* Brq */
252: { NULL, 0, 0 }, /* Bro */
253: { NULL, 0, 0 }, /* Brc */
254: { NULL, 0, 0 }, /* %C */
255: { NULL, 0, 0 }, /* Es */
256: { NULL, 0, 0 }, /* En */
257: { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
258: { NULL, 0, 0 }, /* %Q */
259: { NULL, 0, 0 }, /* br */
260: { NULL, 0, 0 }, /* sp */
261: { NULL, 0, 0 }, /* %U */
262: { NULL, 0, 0 }, /* Ta */
1.1 kristaps 263: };
264:
265: static const char *progname;
1.16 schwarze 266: static int use_all; /* Use all directories and files. */
267: static int verb; /* Output verbosity level. */
1.1 kristaps 268:
269: int
270: main(int argc, char *argv[])
271: {
272: struct mparse *mp; /* parse sequence */
1.10 kristaps 273: struct manpaths dirs;
1.1 kristaps 274: enum op op; /* current operation */
1.5 kristaps 275: const char *dir;
1.26 kristaps 276: char *cp;
277: char pbuf[PATH_MAX],
278: ibuf[MAXPATHLEN], /* index fname */
1.3 kristaps 279: fbuf[MAXPATHLEN]; /* btree fname */
1.16 schwarze 280: int ch, i, flags;
1.1 kristaps 281: DB *idx, /* index database */
282: *db, /* keyword database */
283: *hash; /* temporary keyword hashtable */
284: BTREEINFO info; /* btree configuration */
1.12 schwarze 285: recno_t maxrec; /* last record number in the index */
286: recno_t *recs; /* the numbers of all empty records */
1.5 kristaps 287: size_t sz1, sz2,
1.12 schwarze 288: recsz, /* number of allocated slots in recs */
289: reccur; /* current number of empty records */
1.1 kristaps 290: struct buf buf, /* keyword buffer */
291: dbuf; /* description buffer */
1.5 kristaps 292: struct of *of; /* list of files for processing */
1.1 kristaps 293: extern int optind;
294: extern char *optarg;
295:
296: progname = strrchr(argv[0], '/');
297: if (progname == NULL)
298: progname = argv[0];
299: else
300: ++progname;
301:
1.10 kristaps 302: memset(&dirs, 0, sizeof(struct manpaths));
303:
1.5 kristaps 304: verb = 0;
1.12 schwarze 305: use_all = 0;
1.4 kristaps 306: of = NULL;
1.1 kristaps 307: db = idx = NULL;
308: mp = NULL;
309: hash = NULL;
310: recs = NULL;
311: recsz = reccur = 0;
312: maxrec = 0;
313: op = OP_NEW;
1.5 kristaps 314: dir = NULL;
1.1 kristaps 315:
1.12 schwarze 316: while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
1.1 kristaps 317: switch (ch) {
1.12 schwarze 318: case ('a'):
319: use_all = 1;
320: break;
1.5 kristaps 321: case ('d'):
322: dir = optarg;
323: op = OP_UPDATE;
324: break;
325: case ('u'):
326: dir = optarg;
327: op = OP_DELETE;
328: break;
329: case ('v'):
330: verb++;
331: break;
1.1 kristaps 332: default:
333: usage();
334: return((int)MANDOCLEVEL_BADARG);
335: }
336:
337: argc -= optind;
338: argv += optind;
339:
1.4 kristaps 340: memset(&info, 0, sizeof(BTREEINFO));
341: info.flags = R_DUP;
1.1 kristaps 342:
1.4 kristaps 343: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
1.1 kristaps 344:
1.5 kristaps 345: memset(&buf, 0, sizeof(struct buf));
346: memset(&dbuf, 0, sizeof(struct buf));
1.1 kristaps 347:
1.4 kristaps 348: buf.size = dbuf.size = MANDOC_BUFSZ;
1.1 kristaps 349:
1.4 kristaps 350: buf.cp = mandoc_malloc(buf.size);
351: dbuf.cp = mandoc_malloc(dbuf.size);
1.1 kristaps 352:
1.5 kristaps 353: flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
354:
355: if (OP_UPDATE == op || OP_DELETE == op) {
356: ibuf[0] = fbuf[0] = '\0';
357:
358: strlcat(fbuf, dir, MAXPATHLEN);
359: strlcat(fbuf, "/", MAXPATHLEN);
360: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
361:
362: strlcat(ibuf, dir, MAXPATHLEN);
363: strlcat(ibuf, "/", MAXPATHLEN);
364: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
365:
366: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
367: fprintf(stderr, "%s: Path too long\n", dir);
368: exit((int)MANDOCLEVEL_BADARG);
369: }
370:
371: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
372: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
373:
374: if (NULL == db) {
375: perror(fbuf);
376: exit((int)MANDOCLEVEL_SYSERR);
1.12 schwarze 377: } else if (NULL == idx) {
1.5 kristaps 378: perror(ibuf);
379: exit((int)MANDOCLEVEL_SYSERR);
380: }
381:
382: if (verb > 2) {
383: printf("%s: Opened\n", fbuf);
384: printf("%s: Opened\n", ibuf);
385: }
386:
1.16 schwarze 387: ofile_argbuild(argc, argv, &of);
1.5 kristaps 388: if (NULL == of)
389: goto out;
390:
391: of = of->first;
392:
1.16 schwarze 393: index_prune(of, db, fbuf, idx, ibuf,
1.27 schwarze 394: &maxrec, &recs, &recsz, &reccur);
1.5 kristaps 395:
1.17 schwarze 396: /*
397: * Go to the root of the respective manual tree
398: * such that .so links work. In case of failure,
399: * just prod on, even though .so links won't work.
400: */
401:
402: if (OP_UPDATE == op) {
403: chdir(dir);
1.13 schwarze 404: index_merge(of, mp, &dbuf, &buf, hash,
1.16 schwarze 405: db, fbuf, idx, ibuf,
406: maxrec, recs, reccur);
1.17 schwarze 407: }
1.5 kristaps 408:
409: goto out;
410: }
411:
1.10 kristaps 412: /*
413: * Configure the directories we're going to scan.
414: * If we have command-line arguments, use them.
415: * If not, we use man(1)'s method (see mandocdb.8).
416: */
417:
418: if (argc > 0) {
1.26 kristaps 419: dirs.paths = mandoc_calloc(argc, sizeof(char *));
1.10 kristaps 420: dirs.sz = argc;
1.26 kristaps 421: for (i = 0; i < argc; i++) {
422: if (NULL == (cp = realpath(argv[i], pbuf))) {
423: perror(argv[i]);
424: goto out;
425: }
426: dirs.paths[i] = mandoc_strdup(cp);
427: }
1.10 kristaps 428: } else
1.11 kristaps 429: manpath_parse(&dirs, NULL, NULL);
1.10 kristaps 430:
431: for (i = 0; i < dirs.sz; i++) {
1.5 kristaps 432: ibuf[0] = fbuf[0] = '\0';
1.1 kristaps 433:
1.10 kristaps 434: strlcat(fbuf, dirs.paths[i], MAXPATHLEN);
1.5 kristaps 435: strlcat(fbuf, "/", MAXPATHLEN);
436: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
1.1 kristaps 437:
1.10 kristaps 438: strlcat(ibuf, dirs.paths[i], MAXPATHLEN);
1.5 kristaps 439: strlcat(ibuf, "/", MAXPATHLEN);
440: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
1.1 kristaps 441:
1.5 kristaps 442: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
1.13 schwarze 443: fprintf(stderr, "%s: Path too long\n",
1.10 kristaps 444: dirs.paths[i]);
1.5 kristaps 445: exit((int)MANDOCLEVEL_BADARG);
1.4 kristaps 446: }
1.3 kristaps 447:
1.13 schwarze 448: if (db)
449: (*db->close)(db);
450: if (idx)
451: (*idx->close)(idx);
452:
1.4 kristaps 453: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
454: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.3 kristaps 455:
1.4 kristaps 456: if (NULL == db) {
457: perror(fbuf);
1.5 kristaps 458: exit((int)MANDOCLEVEL_SYSERR);
1.12 schwarze 459: } else if (NULL == idx) {
1.4 kristaps 460: perror(ibuf);
1.5 kristaps 461: exit((int)MANDOCLEVEL_SYSERR);
462: }
463:
464: if (verb > 2) {
465: printf("%s: Truncated\n", fbuf);
466: printf("%s: Truncated\n", ibuf);
1.4 kristaps 467: }
1.1 kristaps 468:
1.4 kristaps 469: ofile_free(of);
470: of = NULL;
1.1 kristaps 471:
1.12 schwarze 472: if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL,
1.16 schwarze 473: 0, &of))
1.5 kristaps 474: exit((int)MANDOCLEVEL_SYSERR);
1.1 kristaps 475:
1.5 kristaps 476: if (NULL == of)
477: continue;
1.1 kristaps 478:
1.5 kristaps 479: of = of->first;
1.1 kristaps 480:
1.17 schwarze 481: /*
482: * Go to the root of the respective manual tree
483: * such that .so links work. In case of failure,
484: * just prod on, even though .so links won't work.
485: */
486:
487: chdir(dirs.paths[i]);
1.13 schwarze 488: index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
1.16 schwarze 489: idx, ibuf, maxrec, recs, reccur);
1.4 kristaps 490: }
1.3 kristaps 491:
1.5 kristaps 492: out:
1.3 kristaps 493: if (db)
494: (*db->close)(db);
495: if (idx)
496: (*idx->close)(idx);
497: if (hash)
498: (*hash->close)(hash);
499: if (mp)
500: mparse_free(mp);
501:
1.10 kristaps 502: manpath_free(&dirs);
1.4 kristaps 503: ofile_free(of);
1.3 kristaps 504: free(buf.cp);
505: free(dbuf.cp);
506: free(recs);
507:
1.5 kristaps 508: return(MANDOCLEVEL_OK);
1.3 kristaps 509: }
510:
511: void
512: index_merge(const struct of *of, struct mparse *mp,
1.16 schwarze 513: struct buf *dbuf, struct buf *buf, DB *hash,
514: DB *db, const char *dbf, DB *idx, const char *idxf,
1.3 kristaps 515: recno_t maxrec, const recno_t *recs, size_t reccur)
516: {
517: recno_t rec;
518: int ch;
519: DBT key, val;
520: struct mdoc *mdoc;
521: struct man *man;
522: const char *fn, *msec, *mtitle, *arch;
523: size_t sv;
524: unsigned seq;
1.9 kristaps 525: struct db_val vbuf;
1.3 kristaps 526:
527: for (rec = 0; of; of = of->next) {
528: fn = of->fname;
1.14 schwarze 529:
530: /*
1.33 ! schwarze 531: * Try interpreting the file as mdoc(7) or man(7)
! 532: * source code, unless it is already known to be
! 533: * formatted. Fall back to formatted mode.
1.14 schwarze 534: */
535:
1.1 kristaps 536: mparse_reset(mp);
1.14 schwarze 537: mdoc = NULL;
538: man = NULL;
1.1 kristaps 539:
1.14 schwarze 540: if ((MANDOC_SRC & of->src_form ||
541: ! (MANDOC_FORM & of->src_form)) &&
542: MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
543: mparse_result(mp, &mdoc, &man);
544:
545: if (NULL != mdoc) {
546: msec = mdoc_meta(mdoc)->msec;
547: arch = mdoc_meta(mdoc)->arch;
548: mtitle = mdoc_meta(mdoc)->title;
549: } else if (NULL != man) {
550: msec = man_meta(man)->msec;
551: arch = NULL;
552: mtitle = man_meta(man)->title;
553: } else {
554: msec = of->sec;
555: arch = of->arch;
556: mtitle = of->title;
1.1 kristaps 557: }
558:
1.12 schwarze 559: /*
560: * By default, skip a file if the manual section
561: * and architecture given in the file disagree
562: * with the directory where the file is located.
563: */
564:
565: if (0 == use_all) {
566: assert(of->sec);
567: assert(msec);
1.32 schwarze 568: if (strcasecmp(msec, of->sec))
1.12 schwarze 569: continue;
570:
571: if (NULL == arch) {
572: if (NULL != of->arch)
573: continue;
574: } else if (NULL == of->arch ||
1.32 schwarze 575: strcasecmp(arch, of->arch))
1.12 schwarze 576: continue;
577: }
578:
1.1 kristaps 579: if (NULL == arch)
580: arch = "";
581:
582: /*
1.12 schwarze 583: * By default, skip a file if the title given
584: * in the file disagrees with the file name.
585: * If both agree, use the file name as the title,
586: * because the one in the file usually is all caps.
587: */
588:
589: assert(of->title);
590: assert(mtitle);
591:
592: if (0 == strcasecmp(mtitle, of->title))
593: mtitle = of->title;
594: else if (0 == use_all)
595: continue;
596:
597: /*
1.1 kristaps 598: * The index record value consists of a nil-terminated
599: * filename, a nil-terminated manual section, and a
600: * nil-terminated description. Since the description
601: * may not be set, we set a sentinel to see if we're
602: * going to write a nil byte in its place.
603: */
604:
1.3 kristaps 605: dbuf->len = 0;
1.15 schwarze 606: buf_append(dbuf, mdoc ? "mdoc" : (man ? "man" : "cat"));
1.3 kristaps 607: buf_appendb(dbuf, fn, strlen(fn) + 1);
608: buf_appendb(dbuf, msec, strlen(msec) + 1);
609: buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
610: buf_appendb(dbuf, arch, strlen(arch) + 1);
1.1 kristaps 611:
1.3 kristaps 612: sv = dbuf->len;
1.1 kristaps 613:
1.33 ! schwarze 614: /*
! 615: * Collect keyword/mask pairs.
! 616: * Each pair will become a new btree node.
! 617: */
1.1 kristaps 618:
1.33 ! schwarze 619: hash_reset(&hash);
1.1 kristaps 620: if (mdoc)
1.3 kristaps 621: pmdoc_node(hash, buf, dbuf,
1.1 kristaps 622: mdoc_node(mdoc), mdoc_meta(mdoc));
1.14 schwarze 623: else if (man)
1.3 kristaps 624: pman_node(hash, buf, dbuf, man_node(man));
1.14 schwarze 625: else
626: pformatted(hash, buf, dbuf, of);
1.1 kristaps 627:
628: /*
1.33 ! schwarze 629: * Reclaim an empty index record, if available.
! 630: * Use its record number for all new btree nodes.
1.1 kristaps 631: */
632:
1.33 ! schwarze 633: if (reccur > 0) {
! 634: --reccur;
! 635: rec = recs[(int)reccur];
! 636: } else if (maxrec > 0) {
! 637: rec = maxrec;
! 638: maxrec = 0;
! 639: } else
! 640: rec++;
1.20 kristaps 641: vbuf.rec = htobe32(rec);
1.33 ! schwarze 642:
! 643: /*
! 644: * Copy from the in-memory hashtable of pending
! 645: * keyword/mask pairs into the database.
! 646: */
! 647:
1.1 kristaps 648: seq = R_FIRST;
649: while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
650: seq = R_NEXT;
1.20 kristaps 651: vbuf.mask = htobe64(*(uint64_t *)val.data);
1.9 kristaps 652: val.size = sizeof(struct db_val);
653: val.data = &vbuf;
1.3 kristaps 654: dbt_put(db, dbf, &key, &val);
1.1 kristaps 655: }
656: if (ch < 0) {
657: perror("hash");
658: exit((int)MANDOCLEVEL_SYSERR);
659: }
660:
661: /*
662: * Apply to the index. If we haven't had a description
663: * set, put an empty one in now.
664: */
665:
1.3 kristaps 666: if (dbuf->len == sv)
667: buf_appendb(dbuf, "", 1);
1.1 kristaps 668:
669: key.data = &rec;
670: key.size = sizeof(recno_t);
671:
1.3 kristaps 672: val.data = dbuf->cp;
673: val.size = dbuf->len;
1.1 kristaps 674:
1.5 kristaps 675: if (verb)
676: printf("%s: Added index\n", fn);
1.18 kristaps 677:
1.3 kristaps 678: dbt_put(idx, idxf, &key, &val);
679: }
680: }
681:
682: /*
683: * Scan through all entries in the index file `idx' and prune those
684: * entries in `ofile'.
685: * Pruning consists of removing from `db', then invalidating the entry
686: * in `idx' (zeroing its value size).
687: */
688: static void
689: index_prune(const struct of *ofile, DB *db, const char *dbf,
1.27 schwarze 690: DB *idx, const char *idxf, recno_t *maxrec,
691: recno_t **recs, size_t *recsz, size_t *reccur)
1.3 kristaps 692: {
693: const struct of *of;
1.18 kristaps 694: const char *fn, *cp;
1.9 kristaps 695: struct db_val *vbuf;
1.3 kristaps 696: unsigned seq, sseq;
697: DBT key, val;
698: int ch;
699:
1.27 schwarze 700: *reccur = 0;
1.3 kristaps 701: seq = R_FIRST;
702: while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
703: seq = R_NEXT;
704: *maxrec = *(recno_t *)key.data;
1.18 kristaps 705: cp = val.data;
706:
707: /* Deleted records are zero-sized. Skip them. */
708:
709: if (0 == val.size)
710: goto cont;
711:
712: /*
713: * Make sure we're sane.
714: * Read past our mdoc/man/cat type to the next string,
715: * then make sure it's bounded by a NUL.
716: * Failing any of these, we go into our error handler.
717: */
718:
719: if (NULL == (fn = memchr(cp, '\0', val.size)))
720: break;
721: if (++fn - cp >= (int)val.size)
722: break;
723: if (NULL == memchr(fn, '\0', val.size - (fn - cp)))
724: break;
725:
726: /*
727: * Search for the file in those we care about.
728: * XXX: build this into a tree. Too slow.
729: */
1.3 kristaps 730:
731: for (of = ofile; of; of = of->next)
732: if (0 == strcmp(fn, of->fname))
733: break;
734:
735: if (NULL == of)
736: continue;
737:
1.18 kristaps 738: /*
739: * Search through the keyword database, throwing out all
740: * references to our file.
741: */
742:
1.3 kristaps 743: sseq = R_FIRST;
744: while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
745: sseq = R_NEXT;
1.18 kristaps 746: if (sizeof(struct db_val) != val.size)
747: break;
748:
1.9 kristaps 749: vbuf = val.data;
1.20 kristaps 750: if (*maxrec != betoh32(vbuf->rec))
1.3 kristaps 751: continue;
1.18 kristaps 752:
753: if ((ch = (*db->del)(db, &key, R_CURSOR)) < 0)
1.3 kristaps 754: break;
755: }
1.18 kristaps 756:
1.3 kristaps 757: if (ch < 0) {
758: perror(dbf);
759: exit((int)MANDOCLEVEL_SYSERR);
1.18 kristaps 760: } else if (1 != ch) {
761: fprintf(stderr, "%s: Corrupt database\n", dbf);
762: exit((int)MANDOCLEVEL_SYSERR);
1.3 kristaps 763: }
1.1 kristaps 764:
1.5 kristaps 765: if (verb)
766: printf("%s: Deleted index\n", fn);
1.1 kristaps 767:
1.3 kristaps 768: val.size = 0;
769: ch = (*idx->put)(idx, &key, &val, R_CURSOR);
1.1 kristaps 770:
1.18 kristaps 771: if (ch < 0)
772: break;
773: cont:
1.27 schwarze 774: if (*reccur >= *recsz) {
1.3 kristaps 775: *recsz += MANDOC_SLOP;
776: *recs = mandoc_realloc
777: (*recs, *recsz * sizeof(recno_t));
778: }
1.1 kristaps 779:
1.27 schwarze 780: (*recs)[(int)*reccur] = *maxrec;
781: (*reccur)++;
1.3 kristaps 782: }
1.18 kristaps 783:
784: if (ch < 0) {
785: perror(idxf);
786: exit((int)MANDOCLEVEL_SYSERR);
787: } else if (1 != ch) {
788: fprintf(stderr, "%s: Corrupt index\n", idxf);
789: exit((int)MANDOCLEVEL_SYSERR);
790: }
791:
1.3 kristaps 792: (*maxrec)++;
1.1 kristaps 793: }
794:
795: /*
796: * Grow the buffer (if necessary) and copy in a binary string.
797: */
798: static void
799: buf_appendb(struct buf *buf, const void *cp, size_t sz)
800: {
801:
802: /* Overshoot by MANDOC_BUFSZ. */
803:
804: while (buf->len + sz >= buf->size) {
805: buf->size = buf->len + sz + MANDOC_BUFSZ;
806: buf->cp = mandoc_realloc(buf->cp, buf->size);
807: }
808:
809: memcpy(buf->cp + (int)buf->len, cp, sz);
810: buf->len += sz;
811: }
812:
813: /*
814: * Append a nil-terminated string to the buffer.
815: * This can be invoked multiple times.
816: * The buffer string will be nil-terminated.
817: * If invoked multiple times, a space is put between strings.
818: */
819: static void
820: buf_append(struct buf *buf, const char *cp)
821: {
822: size_t sz;
823:
824: if (0 == (sz = strlen(cp)))
825: return;
826:
827: if (buf->len)
828: buf->cp[(int)buf->len - 1] = ' ';
829:
830: buf_appendb(buf, cp, sz + 1);
831: }
832:
833: /*
834: * Recursively add all text from a given node.
835: * This is optimised for general mdoc nodes in this context, which do
836: * not consist of subexpressions and having a recursive call for n->next
837: * would be wasteful.
838: * The "f" variable should be 0 unless called from pmdoc_Nd for the
839: * description buffer, which does not start at the beginning of the
840: * buffer.
841: */
842: static void
843: buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
844: {
845:
846: for ( ; n; n = n->next) {
847: if (n->child)
848: buf_appendmdoc(buf, n->child, f);
849:
850: if (MDOC_TEXT == n->type && f) {
851: f = 0;
852: buf_appendb(buf, n->string,
853: strlen(n->string) + 1);
854: } else if (MDOC_TEXT == n->type)
855: buf_append(buf, n->string);
856:
857: }
858: }
859:
860: static void
861: hash_reset(DB **db)
862: {
863: DB *hash;
864:
865: if (NULL != (hash = *db))
866: (*hash->close)(hash);
867:
1.5 kristaps 868: *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1.1 kristaps 869: if (NULL == *db) {
870: perror("hash");
871: exit((int)MANDOCLEVEL_SYSERR);
872: }
873: }
874:
875: /* ARGSUSED */
1.25 schwarze 876: static int
877: pmdoc_head(MDOC_ARGS)
878: {
879:
880: return(MDOC_HEAD == n->type);
881: }
882:
883: /* ARGSUSED */
884: static int
885: pmdoc_body(MDOC_ARGS)
886: {
887:
888: return(MDOC_BODY == n->type);
889: }
890:
891: /* ARGSUSED */
892: static int
1.1 kristaps 893: pmdoc_Fd(MDOC_ARGS)
894: {
895: const char *start, *end;
896: size_t sz;
1.25 schwarze 897:
1.1 kristaps 898: if (SEC_SYNOPSIS != n->sec)
1.25 schwarze 899: return(0);
1.1 kristaps 900: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1.25 schwarze 901: return(0);
1.1 kristaps 902:
903: /*
904: * Only consider those `Fd' macro fields that begin with an
905: * "inclusion" token (versus, e.g., #define).
906: */
907: if (strcmp("#include", n->string))
1.25 schwarze 908: return(0);
1.1 kristaps 909:
910: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1.25 schwarze 911: return(0);
1.1 kristaps 912:
913: /*
914: * Strip away the enclosing angle brackets and make sure we're
915: * not zero-length.
916: */
917:
918: start = n->string;
919: if ('<' == *start || '"' == *start)
920: start++;
921:
922: if (0 == (sz = strlen(start)))
1.25 schwarze 923: return(0);
1.1 kristaps 924:
925: end = &start[(int)sz - 1];
926: if ('>' == *end || '"' == *end)
927: end--;
928:
929: assert(end >= start);
930:
931: buf_appendb(buf, start, (size_t)(end - start + 1));
932: buf_appendb(buf, "", 1);
1.25 schwarze 933: return(1);
1.1 kristaps 934: }
935:
936: /* ARGSUSED */
1.25 schwarze 937: static int
938: pmdoc_In(MDOC_ARGS)
1.1 kristaps 939: {
940:
941: if (NULL == n->child || MDOC_TEXT != n->child->type)
1.25 schwarze 942: return(0);
1.1 kristaps 943:
944: buf_append(buf, n->child->string);
1.25 schwarze 945: return(1);
1.1 kristaps 946: }
947:
948: /* ARGSUSED */
1.25 schwarze 949: static int
1.1 kristaps 950: pmdoc_Fn(MDOC_ARGS)
951: {
1.25 schwarze 952: struct mdoc_node *nn;
1.1 kristaps 953: const char *cp;
954:
1.25 schwarze 955: nn = n->child;
956:
957: if (NULL == nn || MDOC_TEXT != nn->type)
958: return(0);
959:
960: /* .Fn "struct type *name" "char *arg" */
1.1 kristaps 961:
1.25 schwarze 962: cp = strrchr(nn->string, ' ');
1.1 kristaps 963: if (NULL == cp)
1.25 schwarze 964: cp = nn->string;
1.1 kristaps 965:
966: /* Strip away pointer symbol. */
967:
968: while ('*' == *cp)
969: cp++;
970:
1.25 schwarze 971: /* Store the function name. */
972:
1.1 kristaps 973: buf_append(buf, cp);
1.8 schwarze 974: hash_put(hash, buf, TYPE_Fn);
1.25 schwarze 975:
976: /* Store the function type. */
977:
978: if (nn->string < cp) {
979: buf->len = 0;
980: buf_appendb(buf, nn->string, cp - nn->string);
981: buf_appendb(buf, "", 1);
982: hash_put(hash, buf, TYPE_Ft);
983: }
984:
985: /* Store the arguments. */
986:
987: for (nn = nn->next; nn; nn = nn->next) {
988: if (MDOC_TEXT != nn->type)
989: continue;
990: buf->len = 0;
991: buf_append(buf, nn->string);
992: hash_put(hash, buf, TYPE_Fa);
993: }
994:
995: return(0);
1.1 kristaps 996: }
997:
998: /* ARGSUSED */
1.25 schwarze 999: static int
1.1 kristaps 1000: pmdoc_St(MDOC_ARGS)
1001: {
1.25 schwarze 1002:
1.1 kristaps 1003: if (NULL == n->child || MDOC_TEXT != n->child->type)
1.25 schwarze 1004: return(0);
1.1 kristaps 1005:
1006: buf_append(buf, n->child->string);
1.25 schwarze 1007: return(1);
1.1 kristaps 1008: }
1009:
1010: /* ARGSUSED */
1.25 schwarze 1011: static int
1.1 kristaps 1012: pmdoc_Xr(MDOC_ARGS)
1013: {
1014:
1015: if (NULL == (n = n->child))
1.25 schwarze 1016: return(0);
1.1 kristaps 1017:
1018: buf_appendb(buf, n->string, strlen(n->string));
1019:
1020: if (NULL != (n = n->next)) {
1021: buf_appendb(buf, ".", 1);
1022: buf_appendb(buf, n->string, strlen(n->string) + 1);
1023: } else
1024: buf_appendb(buf, ".", 2);
1025:
1.25 schwarze 1026: return(1);
1.1 kristaps 1027: }
1028:
1029: /* ARGSUSED */
1.25 schwarze 1030: static int
1.1 kristaps 1031: pmdoc_Nd(MDOC_ARGS)
1032: {
1033:
1034: if (MDOC_BODY != n->type)
1.25 schwarze 1035: return(0);
1.1 kristaps 1036:
1037: buf_appendmdoc(dbuf, n->child, 1);
1.25 schwarze 1038: return(1);
1.1 kristaps 1039: }
1040:
1041: /* ARGSUSED */
1.25 schwarze 1042: static int
1043: pmdoc_Nm(MDOC_ARGS)
1.1 kristaps 1044: {
1045:
1.25 schwarze 1046: if (SEC_NAME == n->sec)
1047: return(1);
1048: else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1049: return(0);
1.1 kristaps 1050:
1.25 schwarze 1051: if (NULL == n->child)
1052: buf_append(buf, m->name);
1.1 kristaps 1053:
1.25 schwarze 1054: return(1);
1.1 kristaps 1055: }
1056:
1057: /* ARGSUSED */
1.25 schwarze 1058: static int
1059: pmdoc_Sh(MDOC_ARGS)
1.1 kristaps 1060: {
1061:
1.25 schwarze 1062: return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1.1 kristaps 1063: }
1064:
1065: static void
1.9 kristaps 1066: hash_put(DB *db, const struct buf *buf, uint64_t mask)
1.1 kristaps 1067: {
1068: DBT key, val;
1069: int rc;
1070:
1071: if (buf->len < 2)
1072: return;
1073:
1074: key.data = buf->cp;
1075: key.size = buf->len;
1076:
1077: if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1078: perror("hash");
1079: exit((int)MANDOCLEVEL_SYSERR);
1080: } else if (0 == rc)
1.9 kristaps 1081: mask |= *(uint64_t *)val.data;
1.1 kristaps 1082:
1083: val.data = &mask;
1.9 kristaps 1084: val.size = sizeof(uint64_t);
1.1 kristaps 1085:
1086: if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1087: perror("hash");
1088: exit((int)MANDOCLEVEL_SYSERR);
1089: }
1090: }
1091:
1092: static void
1093: dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1094: {
1095:
1096: assert(key->size);
1097: assert(val->size);
1098:
1099: if (0 == (*db->put)(db, key, val, 0))
1100: return;
1101:
1102: perror(dbn);
1103: exit((int)MANDOCLEVEL_SYSERR);
1104: /* NOTREACHED */
1105: }
1106:
1107: /*
1108: * Call out to per-macro handlers after clearing the persistent database
1109: * key. If the macro sets the database key, flush it to the database.
1110: */
1111: static void
1112: pmdoc_node(MDOC_ARGS)
1113: {
1114:
1115: if (NULL == n)
1116: return;
1117:
1118: switch (n->type) {
1119: case (MDOC_HEAD):
1120: /* FALLTHROUGH */
1121: case (MDOC_BODY):
1122: /* FALLTHROUGH */
1123: case (MDOC_TAIL):
1124: /* FALLTHROUGH */
1125: case (MDOC_BLOCK):
1126: /* FALLTHROUGH */
1127: case (MDOC_ELEM):
1.25 schwarze 1128: buf->len = 0;
1129:
1130: /*
1131: * Both NULL handlers and handlers returning true
1132: * request using the data. Only skip the element
1133: * when the handler returns false.
1134: */
1135:
1136: if (NULL != mdocs[n->tok].fp &&
1137: 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1.1 kristaps 1138: break;
1139:
1.25 schwarze 1140: /*
1141: * For many macros, use the text from all children.
1142: * Set zero flags for macros not needing this.
1143: * In that case, the handler must fill the buffer.
1144: */
1145:
1146: if (MDOCF_CHILD & mdocs[n->tok].flags)
1147: buf_appendmdoc(buf, n->child, 0);
1148:
1149: /*
1150: * Cover the most common case:
1151: * Automatically stage one string per element.
1152: * Set a zero mask for macros not needing this.
1153: * Additional staging can be done in the handler.
1154: */
1155:
1156: if (mdocs[n->tok].mask)
1157: hash_put(hash, buf, mdocs[n->tok].mask);
1.1 kristaps 1158: break;
1159: default:
1160: break;
1161: }
1162:
1163: pmdoc_node(hash, buf, dbuf, n->child, m);
1164: pmdoc_node(hash, buf, dbuf, n->next, m);
1165: }
1166:
1167: static int
1168: pman_node(MAN_ARGS)
1169: {
1170: const struct man_node *head, *body;
1171: const char *start, *sv;
1172: size_t sz;
1173:
1174: if (NULL == n)
1175: return(0);
1176:
1177: /*
1178: * We're only searching for one thing: the first text child in
1179: * the BODY of a NAME section. Since we don't keep track of
1180: * sections in -man, run some hoops to find out whether we're in
1181: * the correct section or not.
1182: */
1183:
1184: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1185: body = n;
1186: assert(body->parent);
1187: if (NULL != (head = body->parent->head) &&
1188: 1 == head->nchild &&
1189: NULL != (head = (head->child)) &&
1190: MAN_TEXT == head->type &&
1191: 0 == strcmp(head->string, "NAME") &&
1192: NULL != (body = body->child) &&
1193: MAN_TEXT == body->type) {
1194:
1195: assert(body->string);
1196: start = sv = body->string;
1197:
1198: /*
1199: * Go through a special heuristic dance here.
1200: * This is why -man manuals are great!
1201: * (I'm being sarcastic: my eyes are bleeding.)
1202: * Conventionally, one or more manual names are
1203: * comma-specified prior to a whitespace, then a
1204: * dash, then a description. Try to puzzle out
1205: * the name parts here.
1206: */
1207:
1208: for ( ;; ) {
1209: sz = strcspn(start, " ,");
1210: if ('\0' == start[(int)sz])
1211: break;
1212:
1213: buf->len = 0;
1214: buf_appendb(buf, start, sz);
1215: buf_appendb(buf, "", 1);
1216:
1.8 schwarze 1217: hash_put(hash, buf, TYPE_Nm);
1.1 kristaps 1218:
1219: if (' ' == start[(int)sz]) {
1220: start += (int)sz + 1;
1221: break;
1222: }
1223:
1224: assert(',' == start[(int)sz]);
1225: start += (int)sz + 1;
1226: while (' ' == *start)
1227: start++;
1228: }
1229:
1230: buf->len = 0;
1231:
1232: if (sv == start) {
1233: buf_append(buf, start);
1234: return(1);
1235: }
1236:
1237: while (' ' == *start)
1238: start++;
1239:
1240: if (0 == strncmp(start, "-", 1))
1241: start += 1;
1242: else if (0 == strncmp(start, "\\-", 2))
1243: start += 2;
1244: else if (0 == strncmp(start, "\\(en", 4))
1245: start += 4;
1246: else if (0 == strncmp(start, "\\(em", 4))
1247: start += 4;
1248:
1249: while (' ' == *start)
1250: start++;
1251:
1252: sz = strlen(start) + 1;
1253: buf_appendb(dbuf, start, sz);
1254: buf_appendb(buf, start, sz);
1255:
1.8 schwarze 1256: hash_put(hash, buf, TYPE_Nd);
1.1 kristaps 1257: }
1258: }
1259:
1.7 schwarze 1260: for (n = n->child; n; n = n->next)
1261: if (pman_node(hash, buf, dbuf, n))
1262: return(1);
1.1 kristaps 1263:
1264: return(0);
1265: }
1266:
1.14 schwarze 1267: /*
1268: * Parse a formatted manual page.
1269: * By necessity, this involves rather crude guesswork.
1270: */
1271: static void
1272: pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
1273: const struct of *of)
1274: {
1275: FILE *stream;
1276: char *line, *p;
1277: size_t len, plen;
1278:
1279: if (NULL == (stream = fopen(of->fname, "r"))) {
1280: perror(of->fname);
1281: return;
1282: }
1283:
1284: /*
1285: * Always use the title derived from the filename up front,
1286: * do not even try to find it in the file. This also makes
1287: * sure we don't end up with an orphan index record, even if
1288: * the file content turns out to be completely unintelligible.
1289: */
1290:
1291: buf->len = 0;
1292: buf_append(buf, of->title);
1293: hash_put(hash, buf, TYPE_Nm);
1294:
1.31 schwarze 1295: /* Skip to first blank line. */
1.14 schwarze 1296:
1.28 kristaps 1297: while (NULL != (line = fgetln(stream, &len)))
1.31 schwarze 1298: if ('\n' == *line)
1.28 kristaps 1299: break;
1300:
1.31 schwarze 1301: /*
1302: * Assume the first line that is not indented
1303: * is the first section header. Skip to it.
1.28 kristaps 1304: */
1305:
1306: while (NULL != (line = fgetln(stream, &len)))
1.31 schwarze 1307: if ('\n' != *line && ' ' != *line)
1.28 kristaps 1308: break;
1.14 schwarze 1309:
1310: /*
1.31 schwarze 1311: * If no page content can be found, or the input line
1312: * is already the next section header, or there is no
1313: * trailing newline, reuse the page title as the page
1314: * description.
1.14 schwarze 1315: */
1316:
1.28 kristaps 1317: line = fgetln(stream, &len);
1.31 schwarze 1318: if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
1.14 schwarze 1319: buf_appendb(dbuf, buf->cp, buf->size);
1320: hash_put(hash, buf, TYPE_Nd);
1321: fclose(stream);
1322: return;
1323: }
1324:
1.28 kristaps 1325: line[(int)--len] = '\0';
1326:
1.31 schwarze 1327: /*
1328: * Skip to the first dash.
1.28 kristaps 1329: * Use the remaining line as the description (no more than 70
1330: * bytes).
1.14 schwarze 1331: */
1332:
1.30 kristaps 1333: if (NULL != (p = strstr(line, "- "))) {
1334: for (p += 2; ' ' == *p || '\b' == *p; p++)
1.28 kristaps 1335: /* Skip to next word. */ ;
1336: } else
1.14 schwarze 1337: p = line;
1.28 kristaps 1338:
1339: if ((plen = strlen(p)) > 70) {
1340: plen = 70;
1341: p[plen] = '\0';
1.29 kristaps 1342: }
1343:
1344: /* Strip backspace-encoding from line. */
1345:
1346: while (NULL != (line = memchr(p, '\b', plen))) {
1347: len = line - p;
1348: if (0 == len) {
1349: memmove(line, line + 1, plen--);
1350: continue;
1351: }
1352: memmove(line - 1, line + 1, plen - len);
1353: plen -= 2;
1.14 schwarze 1354: }
1355:
1.28 kristaps 1356: buf_appendb(dbuf, p, plen + 1);
1.14 schwarze 1357: buf->len = 0;
1.28 kristaps 1358: buf_appendb(buf, p, plen + 1);
1.14 schwarze 1359: hash_put(hash, buf, TYPE_Nd);
1.28 kristaps 1360: fclose(stream);
1.14 schwarze 1361: }
1362:
1.5 kristaps 1363: static void
1.16 schwarze 1364: ofile_argbuild(int argc, char *argv[], struct of **of)
1.5 kristaps 1365: {
1.12 schwarze 1366: char buf[MAXPATHLEN];
1367: char *sec, *arch, *title, *p;
1.14 schwarze 1368: int i, src_form;
1.5 kristaps 1369: struct of *nof;
1370:
1371: for (i = 0; i < argc; i++) {
1.12 schwarze 1372:
1373: /*
1374: * Try to infer the manual section, architecture and
1375: * page title from the path, assuming it looks like
1.14 schwarze 1376: * man*[/<arch>]/<title>.<section> or
1377: * cat<section>[/<arch>]/<title>.0
1.12 schwarze 1378: */
1379:
1380: if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
1381: fprintf(stderr, "%s: Path too long\n", argv[i]);
1382: continue;
1383: }
1384: sec = arch = title = NULL;
1.14 schwarze 1385: src_form = 0;
1.12 schwarze 1386: p = strrchr(buf, '\0');
1387: while (p-- > buf) {
1388: if (NULL == sec && '.' == *p) {
1389: sec = p + 1;
1390: *p = '\0';
1.14 schwarze 1391: if ('0' == *sec)
1392: src_form |= MANDOC_FORM;
1393: else if ('1' <= *sec && '9' >= *sec)
1394: src_form |= MANDOC_SRC;
1.12 schwarze 1395: continue;
1396: }
1397: if ('/' != *p)
1398: continue;
1399: if (NULL == title) {
1400: title = p + 1;
1401: *p = '\0';
1402: continue;
1403: }
1.24 schwarze 1404: if (0 == strncmp("man", p + 1, 3))
1.14 schwarze 1405: src_form |= MANDOC_SRC;
1.24 schwarze 1406: else if (0 == strncmp("cat", p + 1, 3))
1.14 schwarze 1407: src_form |= MANDOC_FORM;
1.24 schwarze 1408: else
1.12 schwarze 1409: arch = p + 1;
1410: break;
1411: }
1412: if (NULL == title)
1413: title = buf;
1414:
1415: /*
1416: * Build the file structure.
1417: */
1418:
1.5 kristaps 1419: nof = mandoc_calloc(1, sizeof(struct of));
1.12 schwarze 1420: nof->fname = mandoc_strdup(argv[i]);
1421: if (NULL != sec)
1422: nof->sec = mandoc_strdup(sec);
1423: if (NULL != arch)
1424: nof->arch = mandoc_strdup(arch);
1425: nof->title = mandoc_strdup(title);
1.14 schwarze 1426: nof->src_form = src_form;
1.12 schwarze 1427:
1428: /*
1429: * Add the structure to the list.
1430: */
1431:
1.5 kristaps 1432: if (verb > 2)
1433: printf("%s: Scheduling\n", argv[i]);
1434: if (NULL == *of) {
1435: *of = nof;
1436: (*of)->first = nof;
1437: } else {
1438: nof->first = (*of)->first;
1439: (*of)->next = nof;
1440: *of = nof;
1441: }
1442: }
1443: }
1444:
1.4 kristaps 1445: /*
1446: * Recursively build up a list of files to parse.
1447: * We use this instead of ftw() and so on because I don't want global
1448: * variables hanging around.
1449: * This ignores the mandoc.db and mandoc.index files, but assumes that
1450: * everything else is a manual.
1451: * Pass in a pointer to a NULL structure for the first invocation.
1452: */
1453: static int
1.12 schwarze 1454: ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1.16 schwarze 1455: int p_src_form, struct of **of)
1.4 kristaps 1456: {
1.5 kristaps 1457: char buf[MAXPATHLEN];
1.14 schwarze 1458: struct stat sb;
1.5 kristaps 1459: size_t sz;
1.4 kristaps 1460: DIR *d;
1.12 schwarze 1461: const char *fn, *sec, *arch;
1.14 schwarze 1462: char *p, *q, *suffix;
1.4 kristaps 1463: struct of *nof;
1464: struct dirent *dp;
1.14 schwarze 1465: int src_form;
1.4 kristaps 1466:
1467: if (NULL == (d = opendir(dir))) {
1468: perror(dir);
1469: return(0);
1470: }
1471:
1472: while (NULL != (dp = readdir(d))) {
1473: fn = dp->d_name;
1.12 schwarze 1474:
1475: if ('.' == *fn)
1476: continue;
1477:
1.14 schwarze 1478: src_form = p_src_form;
1479:
1.4 kristaps 1480: if (DT_DIR == dp->d_type) {
1.12 schwarze 1481: sec = psec;
1482: arch = parch;
1483:
1484: /*
1485: * By default, only use directories called:
1.14 schwarze 1486: * man<section>/[<arch>/] or
1487: * cat<section>/[<arch>/]
1.12 schwarze 1488: */
1489:
1490: if (NULL == sec) {
1.14 schwarze 1491: if(0 == strncmp("man", fn, 3)) {
1492: src_form |= MANDOC_SRC;
1.12 schwarze 1493: sec = fn + 3;
1.14 schwarze 1494: } else if (0 == strncmp("cat", fn, 3)) {
1495: src_form |= MANDOC_FORM;
1496: sec = fn + 3;
1497: } else if (use_all)
1.12 schwarze 1498: sec = fn;
1499: else
1500: continue;
1501: } else if (NULL == arch && (use_all ||
1502: NULL == strchr(fn, '.')))
1503: arch = fn;
1504: else if (0 == use_all)
1.5 kristaps 1505: continue;
1506:
1507: buf[0] = '\0';
1508: strlcat(buf, dir, MAXPATHLEN);
1509: strlcat(buf, "/", MAXPATHLEN);
1510: sz = strlcat(buf, fn, MAXPATHLEN);
1511:
1.12 schwarze 1512: if (MAXPATHLEN <= sz) {
1513: fprintf(stderr, "%s: Path too long\n", dir);
1514: return(0);
1515: }
1516:
1517: if (verb > 2)
1518: printf("%s: Scanning\n", buf);
1519:
1520: if ( ! ofile_dirbuild(buf, sec, arch,
1.16 schwarze 1521: src_form, of))
1.12 schwarze 1522: return(0);
1523: }
1524: if (DT_REG != dp->d_type ||
1525: (NULL == psec && !use_all) ||
1526: !strcmp(MANDOC_DB, fn) ||
1527: !strcmp(MANDOC_IDX, fn))
1528: continue;
1529:
1530: /*
1531: * By default, skip files where the file name suffix
1532: * does not agree with the section directory
1533: * they are located in.
1534: */
1535:
1536: suffix = strrchr(fn, '.');
1537: if (0 == use_all) {
1538: if (NULL == suffix)
1.5 kristaps 1539: continue;
1.14 schwarze 1540: if ((MANDOC_SRC & src_form &&
1541: strcmp(suffix + 1, psec)) ||
1542: (MANDOC_FORM & src_form &&
1543: strcmp(suffix + 1, "0")))
1544: continue;
1545: }
1546: if (NULL != suffix) {
1547: if ('0' == suffix[1])
1548: src_form |= MANDOC_FORM;
1549: else if ('1' <= suffix[1] && '9' >= suffix[1])
1550: src_form |= MANDOC_SRC;
1551: }
1552:
1553:
1554: /*
1555: * Skip formatted manuals if a source version is
1556: * available. Ignore the age: it is very unlikely
1557: * that people install newer formatted base manuals
1558: * when they used to have source manuals before,
1559: * and in ports, old manuals get removed on update.
1560: */
1561: if (0 == use_all && MANDOC_FORM & src_form &&
1562: NULL != psec) {
1563: buf[0] = '\0';
1564: strlcat(buf, dir, MAXPATHLEN);
1565: p = strrchr(buf, '/');
1.32 schwarze 1566: if (NULL != parch && NULL != p)
1567: for (p--; p > buf; p--)
1568: if ('/' == *p)
1569: break;
1.14 schwarze 1570: if (NULL == p)
1571: p = buf;
1572: else
1573: p++;
1574: if (0 == strncmp("cat", p, 3))
1575: memcpy(p, "man", 3);
1576: strlcat(buf, "/", MAXPATHLEN);
1577: sz = strlcat(buf, fn, MAXPATHLEN);
1578: if (sz >= MAXPATHLEN) {
1579: fprintf(stderr, "%s: Path too long\n", buf);
1.5 kristaps 1580: continue;
1.14 schwarze 1581: }
1582: q = strrchr(buf, '.');
1583: if (NULL != q && p < q++) {
1584: *q = '\0';
1585: sz = strlcat(buf, psec, MAXPATHLEN);
1586: if (sz >= MAXPATHLEN) {
1587: fprintf(stderr,
1588: "%s: Path too long\n", buf);
1589: continue;
1590: }
1591: if (0 == stat(buf, &sb))
1592: continue;
1593: }
1.5 kristaps 1594: }
1.4 kristaps 1595:
1.5 kristaps 1596: buf[0] = '\0';
1597: strlcat(buf, dir, MAXPATHLEN);
1598: strlcat(buf, "/", MAXPATHLEN);
1.6 schwarze 1599: sz = strlcat(buf, fn, MAXPATHLEN);
1.5 kristaps 1600: if (sz >= MAXPATHLEN) {
1601: fprintf(stderr, "%s: Path too long\n", dir);
1.14 schwarze 1602: continue;
1.5 kristaps 1603: }
1604:
1.4 kristaps 1605: nof = mandoc_calloc(1, sizeof(struct of));
1.5 kristaps 1606: nof->fname = mandoc_strdup(buf);
1.12 schwarze 1607: if (NULL != psec)
1608: nof->sec = mandoc_strdup(psec);
1609: if (NULL != parch)
1610: nof->arch = mandoc_strdup(parch);
1.14 schwarze 1611: nof->src_form = src_form;
1.12 schwarze 1612:
1613: /*
1614: * Remember the file name without the extension,
1615: * to be used as the page title in the database.
1616: */
1617:
1618: if (NULL != suffix)
1619: *suffix = '\0';
1620: nof->title = mandoc_strdup(fn);
1.5 kristaps 1621:
1.14 schwarze 1622: /*
1623: * Add the structure to the list.
1624: */
1625:
1.5 kristaps 1626: if (verb > 2)
1627: printf("%s: Scheduling\n", buf);
1.4 kristaps 1628: if (NULL == *of) {
1629: *of = nof;
1630: (*of)->first = nof;
1631: } else {
1.5 kristaps 1632: nof->first = (*of)->first;
1.4 kristaps 1633: (*of)->next = nof;
1634: *of = nof;
1635: }
1636: }
1637:
1.7 schwarze 1638: closedir(d);
1.4 kristaps 1639: return(1);
1640: }
1641:
1642: static void
1643: ofile_free(struct of *of)
1644: {
1645: struct of *nof;
1646:
1647: while (of) {
1648: nof = of->next;
1649: free(of->fname);
1.12 schwarze 1650: free(of->sec);
1651: free(of->arch);
1652: free(of->title);
1.4 kristaps 1653: free(of);
1654: of = nof;
1655: }
1656: }
1657:
1.1 kristaps 1658: static void
1659: usage(void)
1660: {
1661:
1.5 kristaps 1662: fprintf(stderr, "usage: %s [-v] "
1663: "[-d dir [files...] |"
1664: " -u dir [files...] |"
1665: " dir...]\n", progname);
1.1 kristaps 1666: }
CVSweb