Annotation of mandoc/mandocdb.c, Revision 1.8
1.8 ! schwarze 1: /* $Id: mandocdb.c,v 1.7 2011/11/13 00:53:13 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/param.h>
22:
23: #include <assert.h>
1.4 kristaps 24: #include <dirent.h>
1.1 kristaps 25: #include <fcntl.h>
26: #include <getopt.h>
27: #include <stdio.h>
28: #include <stdint.h>
29: #include <stdlib.h>
30: #include <string.h>
31:
32: #ifdef __linux__
33: # include <db_185.h>
34: #else
35: # include <db.h>
36: #endif
37:
38: #include "man.h"
39: #include "mdoc.h"
40: #include "mandoc.h"
1.8 ! schwarze 41: #include "mandocdb.h"
1.1 kristaps 42:
43: #define MANDOC_BUFSZ BUFSIZ
44: #define MANDOC_SLOP 1024
45:
1.5 kristaps 46: /* Tiny list for files. No need to bring in QUEUE. */
47:
1.3 kristaps 48: struct of {
1.5 kristaps 49: char *fname; /* heap-allocated */
50: struct of *next; /* NULL for last one */
51: struct of *first; /* first in list */
1.3 kristaps 52: };
53:
1.1 kristaps 54: /* Buffer for storing growable data. */
55:
56: struct buf {
57: char *cp;
1.5 kristaps 58: size_t len; /* current length */
59: size_t size; /* total buffer size */
1.1 kristaps 60: };
61:
62: /* Operation we're going to perform. */
63:
64: enum op {
65: OP_NEW = 0, /* new database */
1.5 kristaps 66: OP_UPDATE, /* delete/add entries in existing database */
1.1 kristaps 67: OP_DELETE /* delete entries from existing database */
68: };
69:
70: #define MAN_ARGS DB *hash, \
71: struct buf *buf, \
72: struct buf *dbuf, \
73: const struct man_node *n
74: #define MDOC_ARGS DB *hash, \
75: struct buf *buf, \
76: struct buf *dbuf, \
77: const struct mdoc_node *n, \
78: const struct mdoc_meta *m
79:
80: static void buf_appendmdoc(struct buf *,
81: const struct mdoc_node *, int);
82: static void buf_append(struct buf *, const char *);
83: static void buf_appendb(struct buf *,
84: const void *, size_t);
85: static void dbt_put(DB *, const char *, DBT *, DBT *);
86: static void hash_put(DB *, const struct buf *, int);
87: static void hash_reset(DB **);
1.3 kristaps 88: static void index_merge(const struct of *, struct mparse *,
89: struct buf *, struct buf *,
90: DB *, DB *, const char *,
1.5 kristaps 91: DB *, const char *, int,
1.3 kristaps 92: recno_t, const recno_t *, size_t);
93: static void index_prune(const struct of *, DB *,
94: const char *, DB *, const char *,
1.5 kristaps 95: int, recno_t *, recno_t **, size_t *);
96: static void ofile_argbuild(char *[], int, int, struct of **);
97: static int ofile_dirbuild(const char *, int, struct of **);
1.4 kristaps 98: static void ofile_free(struct of *);
1.1 kristaps 99: static int pman_node(MAN_ARGS);
100: static void pmdoc_node(MDOC_ARGS);
101: static void pmdoc_An(MDOC_ARGS);
102: static void pmdoc_Cd(MDOC_ARGS);
103: static void pmdoc_Er(MDOC_ARGS);
104: static void pmdoc_Ev(MDOC_ARGS);
105: static void pmdoc_Fd(MDOC_ARGS);
106: static void pmdoc_In(MDOC_ARGS);
107: static void pmdoc_Fn(MDOC_ARGS);
108: static void pmdoc_Fo(MDOC_ARGS);
109: static void pmdoc_Nd(MDOC_ARGS);
110: static void pmdoc_Nm(MDOC_ARGS);
111: static void pmdoc_Pa(MDOC_ARGS);
112: static void pmdoc_St(MDOC_ARGS);
113: static void pmdoc_Vt(MDOC_ARGS);
114: static void pmdoc_Xr(MDOC_ARGS);
115: static void usage(void);
116:
117: typedef void (*pmdoc_nf)(MDOC_ARGS);
118:
119: static const pmdoc_nf mdocs[MDOC_MAX] = {
120: NULL, /* Ap */
121: NULL, /* Dd */
122: NULL, /* Dt */
123: NULL, /* Os */
124: NULL, /* Sh */
125: NULL, /* Ss */
126: NULL, /* Pp */
127: NULL, /* D1 */
128: NULL, /* Dl */
129: NULL, /* Bd */
130: NULL, /* Ed */
131: NULL, /* Bl */
132: NULL, /* El */
133: NULL, /* It */
134: NULL, /* Ad */
135: pmdoc_An, /* An */
136: NULL, /* Ar */
137: pmdoc_Cd, /* Cd */
138: NULL, /* Cm */
139: NULL, /* Dv */
140: pmdoc_Er, /* Er */
141: pmdoc_Ev, /* Ev */
142: NULL, /* Ex */
143: NULL, /* Fa */
144: pmdoc_Fd, /* Fd */
145: NULL, /* Fl */
146: pmdoc_Fn, /* Fn */
147: NULL, /* Ft */
148: NULL, /* Ic */
149: pmdoc_In, /* In */
150: NULL, /* Li */
151: pmdoc_Nd, /* Nd */
152: pmdoc_Nm, /* Nm */
153: NULL, /* Op */
154: NULL, /* Ot */
155: pmdoc_Pa, /* Pa */
156: NULL, /* Rv */
157: pmdoc_St, /* St */
158: pmdoc_Vt, /* Va */
159: pmdoc_Vt, /* Vt */
160: pmdoc_Xr, /* Xr */
161: NULL, /* %A */
162: NULL, /* %B */
163: NULL, /* %D */
164: NULL, /* %I */
165: NULL, /* %J */
166: NULL, /* %N */
167: NULL, /* %O */
168: NULL, /* %P */
169: NULL, /* %R */
170: NULL, /* %T */
171: NULL, /* %V */
172: NULL, /* Ac */
173: NULL, /* Ao */
174: NULL, /* Aq */
175: NULL, /* At */
176: NULL, /* Bc */
177: NULL, /* Bf */
178: NULL, /* Bo */
179: NULL, /* Bq */
180: NULL, /* Bsx */
181: NULL, /* Bx */
182: NULL, /* Db */
183: NULL, /* Dc */
184: NULL, /* Do */
185: NULL, /* Dq */
186: NULL, /* Ec */
187: NULL, /* Ef */
188: NULL, /* Em */
189: NULL, /* Eo */
190: NULL, /* Fx */
191: NULL, /* Ms */
192: NULL, /* No */
193: NULL, /* Ns */
194: NULL, /* Nx */
195: NULL, /* Ox */
196: NULL, /* Pc */
197: NULL, /* Pf */
198: NULL, /* Po */
199: NULL, /* Pq */
200: NULL, /* Qc */
201: NULL, /* Ql */
202: NULL, /* Qo */
203: NULL, /* Qq */
204: NULL, /* Re */
205: NULL, /* Rs */
206: NULL, /* Sc */
207: NULL, /* So */
208: NULL, /* Sq */
209: NULL, /* Sm */
210: NULL, /* Sx */
211: NULL, /* Sy */
212: NULL, /* Tn */
213: NULL, /* Ux */
214: NULL, /* Xc */
215: NULL, /* Xo */
216: pmdoc_Fo, /* Fo */
217: NULL, /* Fc */
218: NULL, /* Oo */
219: NULL, /* Oc */
220: NULL, /* Bk */
221: NULL, /* Ek */
222: NULL, /* Bt */
223: NULL, /* Hf */
224: NULL, /* Fr */
225: NULL, /* Ud */
226: NULL, /* Lb */
227: NULL, /* Lp */
228: NULL, /* Lk */
229: NULL, /* Mt */
230: NULL, /* Brq */
231: NULL, /* Bro */
232: NULL, /* Brc */
233: NULL, /* %C */
234: NULL, /* Es */
235: NULL, /* En */
236: NULL, /* Dx */
237: NULL, /* %Q */
238: NULL, /* br */
239: NULL, /* sp */
240: NULL, /* %U */
241: NULL, /* Ta */
242: };
243:
244: static const char *progname;
245:
246: int
247: main(int argc, char *argv[])
248: {
249: struct mparse *mp; /* parse sequence */
250: enum op op; /* current operation */
1.5 kristaps 251: const char *dir;
1.1 kristaps 252: char ibuf[MAXPATHLEN], /* index fname */
1.3 kristaps 253: fbuf[MAXPATHLEN]; /* btree fname */
1.5 kristaps 254: int verb, /* output verbosity */
255: ch, i, flags;
1.1 kristaps 256: DB *idx, /* index database */
257: *db, /* keyword database */
258: *hash; /* temporary keyword hashtable */
259: BTREEINFO info; /* btree configuration */
1.3 kristaps 260: recno_t maxrec; /* supremum of all records */
1.1 kristaps 261: recno_t *recs; /* buffer of empty records */
1.5 kristaps 262: size_t sz1, sz2,
263: recsz, /* buffer size of recs */
1.1 kristaps 264: reccur; /* valid number of recs */
265: struct buf buf, /* keyword buffer */
266: dbuf; /* description buffer */
1.5 kristaps 267: struct of *of; /* list of files for processing */
1.1 kristaps 268: extern int optind;
269: extern char *optarg;
270:
271: progname = strrchr(argv[0], '/');
272: if (progname == NULL)
273: progname = argv[0];
274: else
275: ++progname;
276:
1.5 kristaps 277: verb = 0;
1.4 kristaps 278: of = NULL;
1.1 kristaps 279: db = idx = NULL;
280: mp = NULL;
281: hash = NULL;
282: recs = NULL;
283: recsz = reccur = 0;
284: maxrec = 0;
285: op = OP_NEW;
1.5 kristaps 286: dir = NULL;
1.1 kristaps 287:
1.5 kristaps 288: while (-1 != (ch = getopt(argc, argv, "d:u:v")))
1.1 kristaps 289: switch (ch) {
1.5 kristaps 290: case ('d'):
291: dir = optarg;
292: op = OP_UPDATE;
293: break;
294: case ('u'):
295: dir = optarg;
296: op = OP_DELETE;
297: break;
298: case ('v'):
299: verb++;
300: break;
1.1 kristaps 301: default:
302: usage();
303: return((int)MANDOCLEVEL_BADARG);
304: }
305:
306: argc -= optind;
307: argv += optind;
308:
1.4 kristaps 309: memset(&info, 0, sizeof(BTREEINFO));
310: info.flags = R_DUP;
1.1 kristaps 311:
1.4 kristaps 312: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
1.1 kristaps 313:
1.5 kristaps 314: memset(&buf, 0, sizeof(struct buf));
315: memset(&dbuf, 0, sizeof(struct buf));
1.1 kristaps 316:
1.4 kristaps 317: buf.size = dbuf.size = MANDOC_BUFSZ;
1.1 kristaps 318:
1.4 kristaps 319: buf.cp = mandoc_malloc(buf.size);
320: dbuf.cp = mandoc_malloc(dbuf.size);
1.1 kristaps 321:
1.5 kristaps 322: flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
323:
324: if (OP_UPDATE == op || OP_DELETE == op) {
325: ibuf[0] = fbuf[0] = '\0';
326:
327: strlcat(fbuf, dir, MAXPATHLEN);
328: strlcat(fbuf, "/", MAXPATHLEN);
329: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
330:
331: strlcat(ibuf, dir, MAXPATHLEN);
332: strlcat(ibuf, "/", MAXPATHLEN);
333: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
334:
335: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
336: fprintf(stderr, "%s: Path too long\n", dir);
337: exit((int)MANDOCLEVEL_BADARG);
338: }
339:
340: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
341: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
342:
343: if (NULL == db) {
344: perror(fbuf);
345: exit((int)MANDOCLEVEL_SYSERR);
346: } else if (NULL == db) {
347: perror(ibuf);
348: exit((int)MANDOCLEVEL_SYSERR);
349: }
350:
351: if (verb > 2) {
352: printf("%s: Opened\n", fbuf);
353: printf("%s: Opened\n", ibuf);
354: }
355:
356: ofile_argbuild(argv, argc, verb, &of);
357: if (NULL == of)
358: goto out;
359:
360: of = of->first;
361:
362: index_prune(of, db, fbuf, idx, ibuf, verb,
363: &maxrec, &recs, &recsz);
364:
365: if (OP_UPDATE == op)
366: index_merge(of, mp, &dbuf, &buf, hash,
367: db, fbuf, idx, ibuf, verb,
368: maxrec, recs, reccur);
369:
370: goto out;
371: }
372:
1.4 kristaps 373: for (i = 0; i < argc; i++) {
1.5 kristaps 374: ibuf[0] = fbuf[0] = '\0';
1.1 kristaps 375:
1.4 kristaps 376: strlcat(fbuf, argv[i], MAXPATHLEN);
1.5 kristaps 377: strlcat(fbuf, "/", MAXPATHLEN);
378: sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
1.1 kristaps 379:
1.4 kristaps 380: strlcat(ibuf, argv[i], MAXPATHLEN);
1.5 kristaps 381: strlcat(ibuf, "/", MAXPATHLEN);
382: sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
1.1 kristaps 383:
1.5 kristaps 384: if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
1.4 kristaps 385: fprintf(stderr, "%s: Path too long\n", argv[i]);
1.5 kristaps 386: exit((int)MANDOCLEVEL_BADARG);
1.4 kristaps 387: }
1.3 kristaps 388:
1.4 kristaps 389: db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
390: idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
1.3 kristaps 391:
1.4 kristaps 392: if (NULL == db) {
393: perror(fbuf);
1.5 kristaps 394: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 395: } else if (NULL == db) {
396: perror(ibuf);
1.5 kristaps 397: exit((int)MANDOCLEVEL_SYSERR);
398: }
399:
400: if (verb > 2) {
401: printf("%s: Truncated\n", fbuf);
402: printf("%s: Truncated\n", ibuf);
1.4 kristaps 403: }
1.1 kristaps 404:
1.4 kristaps 405: ofile_free(of);
406: of = NULL;
1.1 kristaps 407:
1.5 kristaps 408: if ( ! ofile_dirbuild(argv[i], verb, &of))
409: exit((int)MANDOCLEVEL_SYSERR);
1.1 kristaps 410:
1.5 kristaps 411: if (NULL == of)
412: continue;
1.1 kristaps 413:
1.5 kristaps 414: of = of->first;
1.1 kristaps 415:
1.5 kristaps 416: index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
417: idx, ibuf, verb, maxrec, recs, reccur);
1.4 kristaps 418: }
1.3 kristaps 419:
1.5 kristaps 420: out:
1.3 kristaps 421: if (db)
422: (*db->close)(db);
423: if (idx)
424: (*idx->close)(idx);
425: if (hash)
426: (*hash->close)(hash);
427: if (mp)
428: mparse_free(mp);
429:
1.4 kristaps 430: ofile_free(of);
1.3 kristaps 431: free(buf.cp);
432: free(dbuf.cp);
433: free(recs);
434:
1.5 kristaps 435: return(MANDOCLEVEL_OK);
1.3 kristaps 436: }
437:
438: void
439: index_merge(const struct of *of, struct mparse *mp,
440: struct buf *dbuf, struct buf *buf,
441: DB *hash, DB *db, const char *dbf,
1.5 kristaps 442: DB *idx, const char *idxf, int verb,
1.3 kristaps 443: recno_t maxrec, const recno_t *recs, size_t reccur)
444: {
445: recno_t rec;
446: int ch;
447: DBT key, val;
448: struct mdoc *mdoc;
449: struct man *man;
450: const char *fn, *msec, *mtitle, *arch;
451: size_t sv;
452: unsigned seq;
453: char vbuf[8];
454:
455: for (rec = 0; of; of = of->next) {
456: fn = of->fname;
457: if (reccur > 0) {
458: --reccur;
459: rec = recs[(int)reccur];
460: } else if (maxrec > 0) {
461: rec = maxrec;
462: maxrec = 0;
1.1 kristaps 463: } else
464: rec++;
465:
466: mparse_reset(mp);
467: hash_reset(&hash);
468:
469: if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
470: fprintf(stderr, "%s: Parse failure\n", fn);
471: continue;
472: }
473:
474: mparse_result(mp, &mdoc, &man);
475: if (NULL == mdoc && NULL == man)
476: continue;
477:
478: msec = NULL != mdoc ?
479: mdoc_meta(mdoc)->msec : man_meta(man)->msec;
480: mtitle = NULL != mdoc ?
481: mdoc_meta(mdoc)->title : man_meta(man)->title;
1.3 kristaps 482: arch = NULL != mdoc ?
483: mdoc_meta(mdoc)->arch : NULL;
1.1 kristaps 484:
485: if (NULL == arch)
486: arch = "";
487:
488: /*
489: * The index record value consists of a nil-terminated
490: * filename, a nil-terminated manual section, and a
491: * nil-terminated description. Since the description
492: * may not be set, we set a sentinel to see if we're
493: * going to write a nil byte in its place.
494: */
495:
1.3 kristaps 496: dbuf->len = 0;
497: buf_appendb(dbuf, fn, strlen(fn) + 1);
498: buf_appendb(dbuf, msec, strlen(msec) + 1);
499: buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
500: buf_appendb(dbuf, arch, strlen(arch) + 1);
1.1 kristaps 501:
1.3 kristaps 502: sv = dbuf->len;
1.1 kristaps 503:
504: /* Fix the record number in the btree value. */
505:
506: if (mdoc)
1.3 kristaps 507: pmdoc_node(hash, buf, dbuf,
1.1 kristaps 508: mdoc_node(mdoc), mdoc_meta(mdoc));
509: else
1.3 kristaps 510: pman_node(hash, buf, dbuf, man_node(man));
1.1 kristaps 511:
512: /*
513: * Copy from the in-memory hashtable of pending keywords
514: * into the database.
515: */
516:
517: memset(vbuf, 0, sizeof(uint32_t));
518: memcpy(vbuf + 4, &rec, sizeof(uint32_t));
519:
520: seq = R_FIRST;
521: while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
522: seq = R_NEXT;
523:
524: memcpy(vbuf, val.data, sizeof(uint32_t));
525: val.size = sizeof(vbuf);
526: val.data = vbuf;
527:
1.5 kristaps 528: if (verb > 1)
529: printf("%s: Added keyword: %s\n",
530: fn, (char *)key.data);
1.3 kristaps 531: dbt_put(db, dbf, &key, &val);
1.1 kristaps 532: }
533: if (ch < 0) {
534: perror("hash");
535: exit((int)MANDOCLEVEL_SYSERR);
536: }
537:
538: /*
539: * Apply to the index. If we haven't had a description
540: * set, put an empty one in now.
541: */
542:
1.3 kristaps 543: if (dbuf->len == sv)
544: buf_appendb(dbuf, "", 1);
1.1 kristaps 545:
546: key.data = &rec;
547: key.size = sizeof(recno_t);
548:
1.3 kristaps 549: val.data = dbuf->cp;
550: val.size = dbuf->len;
1.1 kristaps 551:
1.5 kristaps 552: if (verb)
553: printf("%s: Added index\n", fn);
1.3 kristaps 554: dbt_put(idx, idxf, &key, &val);
555: }
556: }
557:
558: /*
559: * Scan through all entries in the index file `idx' and prune those
560: * entries in `ofile'.
561: * Pruning consists of removing from `db', then invalidating the entry
562: * in `idx' (zeroing its value size).
563: */
564: static void
565: index_prune(const struct of *ofile, DB *db, const char *dbf,
1.5 kristaps 566: DB *idx, const char *idxf, int verb,
1.3 kristaps 567: recno_t *maxrec, recno_t **recs, size_t *recsz)
568: {
569: const struct of *of;
570: const char *fn;
571: unsigned seq, sseq;
572: DBT key, val;
573: size_t reccur;
574: int ch;
575:
576: reccur = 0;
577: seq = R_FIRST;
578: while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
579: seq = R_NEXT;
580: *maxrec = *(recno_t *)key.data;
581: if (0 == val.size) {
582: if (reccur >= *recsz) {
583: *recsz += MANDOC_SLOP;
584: *recs = mandoc_realloc(*recs,
585: *recsz * sizeof(recno_t));
586: }
587: (*recs)[(int)reccur] = *maxrec;
588: reccur++;
589: continue;
590: }
591:
592: fn = (char *)val.data;
593: for (of = ofile; of; of = of->next)
594: if (0 == strcmp(fn, of->fname))
595: break;
596:
597: if (NULL == of)
598: continue;
599:
600: sseq = R_FIRST;
601: while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
602: sseq = R_NEXT;
603: assert(8 == val.size);
604: if (*maxrec != *(recno_t *)(val.data + 4))
605: continue;
1.5 kristaps 606: if (verb)
607: printf("%s: Deleted keyword: %s\n",
608: fn, (char *)key.data);
1.3 kristaps 609: ch = (*db->del)(db, &key, R_CURSOR);
610: if (ch < 0)
611: break;
612: }
613: if (ch < 0) {
614: perror(dbf);
615: exit((int)MANDOCLEVEL_SYSERR);
616: }
1.1 kristaps 617:
1.5 kristaps 618: if (verb)
619: printf("%s: Deleted index\n", fn);
1.1 kristaps 620:
1.3 kristaps 621: val.size = 0;
622: ch = (*idx->put)(idx, &key, &val, R_CURSOR);
623: if (ch < 0) {
624: perror(idxf);
625: exit((int)MANDOCLEVEL_SYSERR);
626: }
1.1 kristaps 627:
1.3 kristaps 628: if (reccur >= *recsz) {
629: *recsz += MANDOC_SLOP;
630: *recs = mandoc_realloc
631: (*recs, *recsz * sizeof(recno_t));
632: }
1.1 kristaps 633:
1.3 kristaps 634: (*recs)[(int)reccur] = *maxrec;
635: reccur++;
636: }
637: (*maxrec)++;
1.1 kristaps 638: }
639:
640: /*
641: * Grow the buffer (if necessary) and copy in a binary string.
642: */
643: static void
644: buf_appendb(struct buf *buf, const void *cp, size_t sz)
645: {
646:
647: /* Overshoot by MANDOC_BUFSZ. */
648:
649: while (buf->len + sz >= buf->size) {
650: buf->size = buf->len + sz + MANDOC_BUFSZ;
651: buf->cp = mandoc_realloc(buf->cp, buf->size);
652: }
653:
654: memcpy(buf->cp + (int)buf->len, cp, sz);
655: buf->len += sz;
656: }
657:
658: /*
659: * Append a nil-terminated string to the buffer.
660: * This can be invoked multiple times.
661: * The buffer string will be nil-terminated.
662: * If invoked multiple times, a space is put between strings.
663: */
664: static void
665: buf_append(struct buf *buf, const char *cp)
666: {
667: size_t sz;
668:
669: if (0 == (sz = strlen(cp)))
670: return;
671:
672: if (buf->len)
673: buf->cp[(int)buf->len - 1] = ' ';
674:
675: buf_appendb(buf, cp, sz + 1);
676: }
677:
678: /*
679: * Recursively add all text from a given node.
680: * This is optimised for general mdoc nodes in this context, which do
681: * not consist of subexpressions and having a recursive call for n->next
682: * would be wasteful.
683: * The "f" variable should be 0 unless called from pmdoc_Nd for the
684: * description buffer, which does not start at the beginning of the
685: * buffer.
686: */
687: static void
688: buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
689: {
690:
691: for ( ; n; n = n->next) {
692: if (n->child)
693: buf_appendmdoc(buf, n->child, f);
694:
695: if (MDOC_TEXT == n->type && f) {
696: f = 0;
697: buf_appendb(buf, n->string,
698: strlen(n->string) + 1);
699: } else if (MDOC_TEXT == n->type)
700: buf_append(buf, n->string);
701:
702: }
703: }
704:
705: /* ARGSUSED */
706: static void
707: pmdoc_An(MDOC_ARGS)
708: {
709:
710: if (SEC_AUTHORS != n->sec)
711: return;
712:
713: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 714: hash_put(hash, buf, TYPE_An);
1.1 kristaps 715: }
716:
717: static void
718: hash_reset(DB **db)
719: {
720: DB *hash;
721:
722: if (NULL != (hash = *db))
723: (*hash->close)(hash);
724:
1.5 kristaps 725: *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1.1 kristaps 726: if (NULL == *db) {
727: perror("hash");
728: exit((int)MANDOCLEVEL_SYSERR);
729: }
730: }
731:
732: /* ARGSUSED */
733: static void
734: pmdoc_Fd(MDOC_ARGS)
735: {
736: const char *start, *end;
737: size_t sz;
738:
739: if (SEC_SYNOPSIS != n->sec)
740: return;
741: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
742: return;
743:
744: /*
745: * Only consider those `Fd' macro fields that begin with an
746: * "inclusion" token (versus, e.g., #define).
747: */
748: if (strcmp("#include", n->string))
749: return;
750:
751: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
752: return;
753:
754: /*
755: * Strip away the enclosing angle brackets and make sure we're
756: * not zero-length.
757: */
758:
759: start = n->string;
760: if ('<' == *start || '"' == *start)
761: start++;
762:
763: if (0 == (sz = strlen(start)))
764: return;
765:
766: end = &start[(int)sz - 1];
767: if ('>' == *end || '"' == *end)
768: end--;
769:
770: assert(end >= start);
771:
772: buf_appendb(buf, start, (size_t)(end - start + 1));
773: buf_appendb(buf, "", 1);
774:
1.8 ! schwarze 775: hash_put(hash, buf, TYPE_In);
1.1 kristaps 776: }
777:
778: /* ARGSUSED */
779: static void
780: pmdoc_Cd(MDOC_ARGS)
781: {
782:
783: if (SEC_SYNOPSIS != n->sec)
784: return;
785:
786: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 787: hash_put(hash, buf, TYPE_Cd);
1.1 kristaps 788: }
789:
790: /* ARGSUSED */
791: static void
792: pmdoc_In(MDOC_ARGS)
793: {
794:
795: if (SEC_SYNOPSIS != n->sec)
796: return;
797: if (NULL == n->child || MDOC_TEXT != n->child->type)
798: return;
799:
800: buf_append(buf, n->child->string);
1.8 ! schwarze 801: hash_put(hash, buf, TYPE_In);
1.1 kristaps 802: }
803:
804: /* ARGSUSED */
805: static void
806: pmdoc_Fn(MDOC_ARGS)
807: {
808: const char *cp;
809:
810: if (SEC_SYNOPSIS != n->sec)
811: return;
812: if (NULL == n->child || MDOC_TEXT != n->child->type)
813: return;
814:
815: /* .Fn "struct type *arg" "foo" */
816:
817: cp = strrchr(n->child->string, ' ');
818: if (NULL == cp)
819: cp = n->child->string;
820:
821: /* Strip away pointer symbol. */
822:
823: while ('*' == *cp)
824: cp++;
825:
826: buf_append(buf, cp);
1.8 ! schwarze 827: hash_put(hash, buf, TYPE_Fn);
1.1 kristaps 828: }
829:
830: /* ARGSUSED */
831: static void
832: pmdoc_St(MDOC_ARGS)
833: {
834:
835: if (SEC_STANDARDS != n->sec)
836: return;
837: if (NULL == n->child || MDOC_TEXT != n->child->type)
838: return;
839:
840: buf_append(buf, n->child->string);
1.8 ! schwarze 841: hash_put(hash, buf, TYPE_St);
1.1 kristaps 842: }
843:
844: /* ARGSUSED */
845: static void
846: pmdoc_Xr(MDOC_ARGS)
847: {
848:
849: if (NULL == (n = n->child))
850: return;
851:
852: buf_appendb(buf, n->string, strlen(n->string));
853:
854: if (NULL != (n = n->next)) {
855: buf_appendb(buf, ".", 1);
856: buf_appendb(buf, n->string, strlen(n->string) + 1);
857: } else
858: buf_appendb(buf, ".", 2);
859:
1.8 ! schwarze 860: hash_put(hash, buf, TYPE_Xr);
1.1 kristaps 861: }
862:
863: /* ARGSUSED */
864: static void
865: pmdoc_Vt(MDOC_ARGS)
866: {
867: const char *start;
868: size_t sz;
869:
870: if (SEC_SYNOPSIS != n->sec)
871: return;
872: if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
873: return;
874: if (NULL == n->last || MDOC_TEXT != n->last->type)
875: return;
876:
877: /*
878: * Strip away leading pointer symbol '*' and trailing ';'.
879: */
880:
881: start = n->last->string;
882:
883: while ('*' == *start)
884: start++;
885:
886: if (0 == (sz = strlen(start)))
887: return;
888:
889: if (';' == start[(int)sz - 1])
890: sz--;
891:
892: if (0 == sz)
893: return;
894:
895: buf_appendb(buf, start, sz);
896: buf_appendb(buf, "", 1);
1.8 ! schwarze 897: hash_put(hash, buf, TYPE_Va);
1.1 kristaps 898: }
899:
900: /* ARGSUSED */
901: static void
902: pmdoc_Fo(MDOC_ARGS)
903: {
904:
905: if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
906: return;
907: if (NULL == n->child || MDOC_TEXT != n->child->type)
908: return;
909:
910: buf_append(buf, n->child->string);
1.8 ! schwarze 911: hash_put(hash, buf, TYPE_Fn);
1.1 kristaps 912: }
913:
914:
915: /* ARGSUSED */
916: static void
917: pmdoc_Nd(MDOC_ARGS)
918: {
919:
920: if (MDOC_BODY != n->type)
921: return;
922:
923: buf_appendmdoc(dbuf, n->child, 1);
924: buf_appendmdoc(buf, n->child, 0);
925:
1.8 ! schwarze 926: hash_put(hash, buf, TYPE_Nd);
1.1 kristaps 927: }
928:
929: /* ARGSUSED */
930: static void
931: pmdoc_Er(MDOC_ARGS)
932: {
933:
934: if (SEC_ERRORS != n->sec)
935: return;
936:
937: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 938: hash_put(hash, buf, TYPE_Er);
1.1 kristaps 939: }
940:
941: /* ARGSUSED */
942: static void
943: pmdoc_Ev(MDOC_ARGS)
944: {
945:
946: if (SEC_ENVIRONMENT != n->sec)
947: return;
948:
949: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 950: hash_put(hash, buf, TYPE_Ev);
1.1 kristaps 951: }
952:
953: /* ARGSUSED */
954: static void
955: pmdoc_Pa(MDOC_ARGS)
956: {
957:
958: if (SEC_FILES != n->sec)
959: return;
960:
961: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 962: hash_put(hash, buf, TYPE_Pa);
1.1 kristaps 963: }
964:
965: /* ARGSUSED */
966: static void
967: pmdoc_Nm(MDOC_ARGS)
968: {
969:
970: if (SEC_NAME == n->sec) {
971: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 972: hash_put(hash, buf, TYPE_Nm);
1.1 kristaps 973: return;
974: } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
975: return;
976:
977: if (NULL == n->child)
978: buf_append(buf, m->name);
979:
980: buf_appendmdoc(buf, n->child, 0);
1.8 ! schwarze 981: hash_put(hash, buf, TYPE_Nm);
1.1 kristaps 982: }
983:
984: static void
985: hash_put(DB *db, const struct buf *buf, int mask)
986: {
987: DBT key, val;
988: int rc;
989:
990: if (buf->len < 2)
991: return;
992:
993: key.data = buf->cp;
994: key.size = buf->len;
995:
996: if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
997: perror("hash");
998: exit((int)MANDOCLEVEL_SYSERR);
999: } else if (0 == rc)
1000: mask |= *(int *)val.data;
1001:
1002: val.data = &mask;
1003: val.size = sizeof(int);
1004:
1005: if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1006: perror("hash");
1007: exit((int)MANDOCLEVEL_SYSERR);
1008: }
1009: }
1010:
1011: static void
1012: dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1013: {
1014:
1015: assert(key->size);
1016: assert(val->size);
1017:
1018: if (0 == (*db->put)(db, key, val, 0))
1019: return;
1020:
1021: perror(dbn);
1022: exit((int)MANDOCLEVEL_SYSERR);
1023: /* NOTREACHED */
1024: }
1025:
1026: /*
1027: * Call out to per-macro handlers after clearing the persistent database
1028: * key. If the macro sets the database key, flush it to the database.
1029: */
1030: static void
1031: pmdoc_node(MDOC_ARGS)
1032: {
1033:
1034: if (NULL == n)
1035: return;
1036:
1037: switch (n->type) {
1038: case (MDOC_HEAD):
1039: /* FALLTHROUGH */
1040: case (MDOC_BODY):
1041: /* FALLTHROUGH */
1042: case (MDOC_TAIL):
1043: /* FALLTHROUGH */
1044: case (MDOC_BLOCK):
1045: /* FALLTHROUGH */
1046: case (MDOC_ELEM):
1047: if (NULL == mdocs[n->tok])
1048: break;
1049:
1050: buf->len = 0;
1051: (*mdocs[n->tok])(hash, buf, dbuf, n, m);
1052: break;
1053: default:
1054: break;
1055: }
1056:
1057: pmdoc_node(hash, buf, dbuf, n->child, m);
1058: pmdoc_node(hash, buf, dbuf, n->next, m);
1059: }
1060:
1061: static int
1062: pman_node(MAN_ARGS)
1063: {
1064: const struct man_node *head, *body;
1065: const char *start, *sv;
1066: size_t sz;
1067:
1068: if (NULL == n)
1069: return(0);
1070:
1071: /*
1072: * We're only searching for one thing: the first text child in
1073: * the BODY of a NAME section. Since we don't keep track of
1074: * sections in -man, run some hoops to find out whether we're in
1075: * the correct section or not.
1076: */
1077:
1078: if (MAN_BODY == n->type && MAN_SH == n->tok) {
1079: body = n;
1080: assert(body->parent);
1081: if (NULL != (head = body->parent->head) &&
1082: 1 == head->nchild &&
1083: NULL != (head = (head->child)) &&
1084: MAN_TEXT == head->type &&
1085: 0 == strcmp(head->string, "NAME") &&
1086: NULL != (body = body->child) &&
1087: MAN_TEXT == body->type) {
1088:
1089: assert(body->string);
1090: start = sv = body->string;
1091:
1092: /*
1093: * Go through a special heuristic dance here.
1094: * This is why -man manuals are great!
1095: * (I'm being sarcastic: my eyes are bleeding.)
1096: * Conventionally, one or more manual names are
1097: * comma-specified prior to a whitespace, then a
1098: * dash, then a description. Try to puzzle out
1099: * the name parts here.
1100: */
1101:
1102: for ( ;; ) {
1103: sz = strcspn(start, " ,");
1104: if ('\0' == start[(int)sz])
1105: break;
1106:
1107: buf->len = 0;
1108: buf_appendb(buf, start, sz);
1109: buf_appendb(buf, "", 1);
1110:
1.8 ! schwarze 1111: hash_put(hash, buf, TYPE_Nm);
1.1 kristaps 1112:
1113: if (' ' == start[(int)sz]) {
1114: start += (int)sz + 1;
1115: break;
1116: }
1117:
1118: assert(',' == start[(int)sz]);
1119: start += (int)sz + 1;
1120: while (' ' == *start)
1121: start++;
1122: }
1123:
1124: buf->len = 0;
1125:
1126: if (sv == start) {
1127: buf_append(buf, start);
1128: return(1);
1129: }
1130:
1131: while (' ' == *start)
1132: start++;
1133:
1134: if (0 == strncmp(start, "-", 1))
1135: start += 1;
1136: else if (0 == strncmp(start, "\\-", 2))
1137: start += 2;
1138: else if (0 == strncmp(start, "\\(en", 4))
1139: start += 4;
1140: else if (0 == strncmp(start, "\\(em", 4))
1141: start += 4;
1142:
1143: while (' ' == *start)
1144: start++;
1145:
1146: sz = strlen(start) + 1;
1147: buf_appendb(dbuf, start, sz);
1148: buf_appendb(buf, start, sz);
1149:
1.8 ! schwarze 1150: hash_put(hash, buf, TYPE_Nd);
1.1 kristaps 1151: }
1152: }
1153:
1.7 schwarze 1154: for (n = n->child; n; n = n->next)
1155: if (pman_node(hash, buf, dbuf, n))
1156: return(1);
1.1 kristaps 1157:
1158: return(0);
1159: }
1160:
1.5 kristaps 1161: static void
1162: ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1163: {
1164: int i;
1165: struct of *nof;
1166:
1167: for (i = 0; i < argc; i++) {
1168: nof = mandoc_calloc(1, sizeof(struct of));
1169: nof->fname = strdup(argv[i]);
1170: if (verb > 2)
1171: printf("%s: Scheduling\n", argv[i]);
1172: if (NULL == *of) {
1173: *of = nof;
1174: (*of)->first = nof;
1175: } else {
1176: nof->first = (*of)->first;
1177: (*of)->next = nof;
1178: *of = nof;
1179: }
1180: }
1181: }
1182:
1.4 kristaps 1183: /*
1184: * Recursively build up a list of files to parse.
1185: * We use this instead of ftw() and so on because I don't want global
1186: * variables hanging around.
1187: * This ignores the mandoc.db and mandoc.index files, but assumes that
1188: * everything else is a manual.
1189: * Pass in a pointer to a NULL structure for the first invocation.
1190: */
1191: static int
1.5 kristaps 1192: ofile_dirbuild(const char *dir, int verb, struct of **of)
1.4 kristaps 1193: {
1.5 kristaps 1194: char buf[MAXPATHLEN];
1195: size_t sz;
1.4 kristaps 1196: DIR *d;
1197: const char *fn;
1198: struct of *nof;
1199: struct dirent *dp;
1200:
1201: if (NULL == (d = opendir(dir))) {
1202: perror(dir);
1203: return(0);
1204: }
1205:
1206: while (NULL != (dp = readdir(d))) {
1207: fn = dp->d_name;
1208: if (DT_DIR == dp->d_type) {
1.5 kristaps 1209: if (0 == strcmp(".", fn))
1210: continue;
1211: if (0 == strcmp("..", fn))
1212: continue;
1213:
1214: buf[0] = '\0';
1215: strlcat(buf, dir, MAXPATHLEN);
1216: strlcat(buf, "/", MAXPATHLEN);
1217: sz = strlcat(buf, fn, MAXPATHLEN);
1218:
1219: if (sz < MAXPATHLEN) {
1220: if ( ! ofile_dirbuild(buf, verb, of))
1.4 kristaps 1221: return(0);
1.5 kristaps 1222: continue;
1223: } else if (sz < MAXPATHLEN)
1224: continue;
1225:
1226: fprintf(stderr, "%s: Path too long\n", dir);
1227: return(0);
1228: }
1229: if (DT_REG != dp->d_type)
1.4 kristaps 1230: continue;
1231:
1232: if (0 == strcmp(MANDOC_DB, fn) ||
1233: 0 == strcmp(MANDOC_IDX, fn))
1234: continue;
1235:
1.5 kristaps 1236: buf[0] = '\0';
1237: strlcat(buf, dir, MAXPATHLEN);
1238: strlcat(buf, "/", MAXPATHLEN);
1.6 schwarze 1239: sz = strlcat(buf, fn, MAXPATHLEN);
1.5 kristaps 1240: if (sz >= MAXPATHLEN) {
1241: fprintf(stderr, "%s: Path too long\n", dir);
1242: return(0);
1243: }
1244:
1.4 kristaps 1245: nof = mandoc_calloc(1, sizeof(struct of));
1.5 kristaps 1246: nof->fname = mandoc_strdup(buf);
1247:
1248: if (verb > 2)
1249: printf("%s: Scheduling\n", buf);
1.4 kristaps 1250:
1251: if (NULL == *of) {
1252: *of = nof;
1253: (*of)->first = nof;
1254: } else {
1.5 kristaps 1255: nof->first = (*of)->first;
1.4 kristaps 1256: (*of)->next = nof;
1257: *of = nof;
1258: }
1259: }
1260:
1.7 schwarze 1261: closedir(d);
1.4 kristaps 1262: return(1);
1263: }
1264:
1265: static void
1266: ofile_free(struct of *of)
1267: {
1268: struct of *nof;
1269:
1270: while (of) {
1271: nof = of->next;
1272: free(of->fname);
1273: free(of);
1274: of = nof;
1275: }
1276: }
1277:
1.1 kristaps 1278: static void
1279: usage(void)
1280: {
1281:
1.5 kristaps 1282: fprintf(stderr, "usage: %s [-v] "
1283: "[-d dir [files...] |"
1284: " -u dir [files...] |"
1285: " dir...]\n", progname);
1.1 kristaps 1286: }
CVSweb