Annotation of mandoc/mandoc-db.c, Revision 1.4
1.4 ! kristaps 1: /* $Id: mandoc-db.c,v 1.3 2011/04/03 14:18:29 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.2 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
1.1 kristaps 21: #include <sys/param.h>
22:
23: #include <assert.h>
24: #ifdef __linux__
25: # include <db_185.h>
26: #else
27: # include <db.h>
28: #endif
29: #include <fcntl.h>
30: #include <getopt.h>
31: #include <stdio.h>
32: #include <stdint.h>
33: #include <stdlib.h>
34: #include <string.h>
35:
36: #include "man.h"
37: #include "mdoc.h"
38: #include "mandoc.h"
39:
40: #define MANDOC_DB "mandoc.db"
1.3 kristaps 41: #define MANDOC_IDX "mandoc.index"
1.2 kristaps 42: #define MANDOC_BUFSZ BUFSIZ
1.3 kristaps 43: #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
1.1 kristaps 44:
45: enum type {
46: MANDOC_NONE = 0,
47: MANDOC_NAME,
48: MANDOC_FUNCTION,
49: MANDOC_UTILITY,
50: MANDOC_INCLUDES,
51: MANDOC_VARIABLE
52: };
53:
54: #define MDOC_ARGS DB *db, \
55: const char *dbn, \
56: DBT *key, size_t *ksz, \
57: DBT *val, \
58: const struct mdoc_node *n
59:
60: static void dbt_append(DBT *, size_t *, const char *);
61: static void dbt_appendb(DBT *, size_t *,
62: const void *, size_t);
63: static void dbt_init(DBT *, size_t *);
64: static void usage(void);
65: static void pmdoc(DB *, const char *,
1.3 kristaps 66: DBT *, size_t *, DBT *,
1.1 kristaps 67: const char *, struct mdoc *);
68: static void pmdoc_node(MDOC_ARGS);
69: static void pmdoc_Fd(MDOC_ARGS);
70: static void pmdoc_In(MDOC_ARGS);
71: static void pmdoc_Fn(MDOC_ARGS);
72: static void pmdoc_Fo(MDOC_ARGS);
73: static void pmdoc_Nm(MDOC_ARGS);
74: static void pmdoc_Vt(MDOC_ARGS);
75:
76: typedef void (*pmdoc_nf)(MDOC_ARGS);
77:
78: static const char *progname;
79:
80: static const pmdoc_nf mdocs[MDOC_MAX] = {
81: NULL, /* Ap */
82: NULL, /* Dd */
83: NULL, /* Dt */
84: NULL, /* Os */
85: NULL, /* Sh */
86: NULL, /* Ss */
87: NULL, /* Pp */
88: NULL, /* D1 */
89: NULL, /* Dl */
90: NULL, /* Bd */
91: NULL, /* Ed */
92: NULL, /* Bl */
93: NULL, /* El */
94: NULL, /* It */
95: NULL, /* Ad */
96: NULL, /* An */
97: NULL, /* Ar */
98: NULL, /* Cd */
99: NULL, /* Cm */
100: NULL, /* Dv */
101: NULL, /* Er */
102: NULL, /* Ev */
103: NULL, /* Ex */
104: NULL, /* Fa */
105: pmdoc_Fd, /* Fd */
106: NULL, /* Fl */
107: pmdoc_Fn, /* Fn */
108: NULL, /* Ft */
109: NULL, /* Ic */
110: pmdoc_In, /* In */
111: NULL, /* Li */
112: NULL, /* Nd */
113: pmdoc_Nm, /* Nm */
114: NULL, /* Op */
115: NULL, /* Ot */
116: NULL, /* Pa */
117: NULL, /* Rv */
118: NULL, /* St */
119: pmdoc_Vt, /* Va */
120: pmdoc_Vt, /* Vt */
121: NULL, /* Xr */
122: NULL, /* %A */
123: NULL, /* %B */
124: NULL, /* %D */
125: NULL, /* %I */
126: NULL, /* %J */
127: NULL, /* %N */
128: NULL, /* %O */
129: NULL, /* %P */
130: NULL, /* %R */
131: NULL, /* %T */
132: NULL, /* %V */
133: NULL, /* Ac */
134: NULL, /* Ao */
135: NULL, /* Aq */
136: NULL, /* At */
137: NULL, /* Bc */
138: NULL, /* Bf */
139: NULL, /* Bo */
140: NULL, /* Bq */
141: NULL, /* Bsx */
142: NULL, /* Bx */
143: NULL, /* Db */
144: NULL, /* Dc */
145: NULL, /* Do */
146: NULL, /* Dq */
147: NULL, /* Ec */
148: NULL, /* Ef */
149: NULL, /* Em */
150: NULL, /* Eo */
151: NULL, /* Fx */
152: NULL, /* Ms */
153: NULL, /* No */
154: NULL, /* Ns */
155: NULL, /* Nx */
156: NULL, /* Ox */
157: NULL, /* Pc */
158: NULL, /* Pf */
159: NULL, /* Po */
160: NULL, /* Pq */
161: NULL, /* Qc */
162: NULL, /* Ql */
163: NULL, /* Qo */
164: NULL, /* Qq */
165: NULL, /* Re */
166: NULL, /* Rs */
167: NULL, /* Sc */
168: NULL, /* So */
169: NULL, /* Sq */
170: NULL, /* Sm */
171: NULL, /* Sx */
172: NULL, /* Sy */
173: NULL, /* Tn */
174: NULL, /* Ux */
175: NULL, /* Xc */
176: NULL, /* Xo */
177: pmdoc_Fo, /* Fo */
178: NULL, /* Fc */
179: NULL, /* Oo */
180: NULL, /* Oc */
181: NULL, /* Bk */
182: NULL, /* Ek */
183: NULL, /* Bt */
184: NULL, /* Hf */
185: NULL, /* Fr */
186: NULL, /* Ud */
187: NULL, /* Lb */
188: NULL, /* Lp */
189: NULL, /* Lk */
190: NULL, /* Mt */
191: NULL, /* Brq */
192: NULL, /* Bro */
193: NULL, /* Brc */
194: NULL, /* %C */
195: NULL, /* Es */
196: NULL, /* En */
197: NULL, /* Dx */
198: NULL, /* %Q */
199: NULL, /* br */
200: NULL, /* sp */
201: NULL, /* %U */
202: NULL, /* Ta */
203: };
204:
205: int
206: main(int argc, char *argv[])
207: {
1.2 kristaps 208: struct mparse *mp; /* parse sequence */
209: struct mdoc *mdoc; /* resulting mdoc */
1.3 kristaps 210: char *fn;
211: const char *dir; /* result dir (default: cwd) */
212: char ibuf[MAXPATHLEN], /* index fname */
213: ibbuf[MAXPATHLEN], /* index backup fname */
214: fbuf[MAXPATHLEN], /* btree fname */
1.2 kristaps 215: fbbuf[MAXPATHLEN]; /* btree backup fname */
1.1 kristaps 216: int c;
1.3 kristaps 217: DB *index, /* index database */
218: *db; /* keyword database */
219: DBT rkey, rval, /* recno entries */
220: key, val; /* persistent keyword entries */
221: size_t ksz; /* entry buffer size */
222: char vbuf[8];
1.2 kristaps 223: BTREEINFO info; /* btree configuration */
1.3 kristaps 224: recno_t rec;
1.1 kristaps 225: extern int optind;
226: extern char *optarg;
227:
228: progname = strrchr(argv[0], '/');
229: if (progname == NULL)
230: progname = argv[0];
231: else
232: ++progname;
233:
1.3 kristaps 234: dir = "";
1.2 kristaps 235:
1.4 ! kristaps 236: while (-1 != (c = getopt(argc, argv, "d:")))
1.1 kristaps 237: switch (c) {
1.2 kristaps 238: case ('d'):
239: dir = optarg;
1.1 kristaps 240: break;
241: default:
242: usage();
243: return((int)MANDOCLEVEL_BADARG);
244: }
245:
246: argc -= optind;
247: argv += optind;
248:
249: /*
1.3 kristaps 250: * Set up temporary file-names into which we're going to write
251: * all of our data (both for the index and database). These
252: * will be securely renamed to the real file-names after we've
253: * written all of our data.
1.1 kristaps 254: */
255:
1.3 kristaps 256: ibuf[0] = ibuf[MAXPATHLEN - 2] =
257: ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
258: fbuf[0] = fbuf[MAXPATHLEN - 2] =
259: fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
1.2 kristaps 260:
261: strlcat(fbuf, dir, MAXPATHLEN);
262: strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
1.3 kristaps 263:
1.2 kristaps 264: strlcat(fbbuf, fbuf, MAXPATHLEN);
265: strlcat(fbbuf, "~", MAXPATHLEN);
266:
1.3 kristaps 267: strlcat(ibuf, dir, MAXPATHLEN);
268: strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
269:
270: strlcat(ibbuf, ibuf, MAXPATHLEN);
271: strlcat(ibbuf, "~", MAXPATHLEN);
272:
1.2 kristaps 273: if ('\0' != fbuf[MAXPATHLEN - 2] ||
1.3 kristaps 274: '\0' != fbbuf[MAXPATHLEN - 2] ||
275: '\0' != ibuf[MAXPATHLEN - 2] ||
276: '\0' != ibbuf[MAXPATHLEN - 2]) {
277: fprintf(stderr, "%s: Path too long\n", progname);
1.1 kristaps 278: exit((int)MANDOCLEVEL_SYSERR);
279: }
280:
281: /*
1.3 kristaps 282: * For the keyword database, open a BTREE database that allows
283: * duplicates. For the index database, use a standard RECNO
284: * database type.
1.1 kristaps 285: */
286:
287: memset(&info, 0, sizeof(BTREEINFO));
288: info.flags = R_DUP;
1.3 kristaps 289: db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
1.1 kristaps 290:
291: if (NULL == db) {
1.2 kristaps 292: perror(fbbuf);
1.1 kristaps 293: exit((int)MANDOCLEVEL_SYSERR);
294: }
295:
1.3 kristaps 296: index = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
1.1 kristaps 297:
1.3 kristaps 298: if (NULL == db) {
299: perror(ibbuf);
300: (*db->close)(db);
301: exit((int)MANDOCLEVEL_SYSERR);
302: }
1.1 kristaps 303:
304: /*
305: * Try parsing the manuals given on the command line. If we
306: * totally fail, then just keep on going. Take resulting trees
307: * and push them down into the database code.
1.3 kristaps 308: * Use the auto-parser and don't report any errors.
1.1 kristaps 309: */
310:
1.3 kristaps 311: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
312:
1.1 kristaps 313: memset(&key, 0, sizeof(DBT));
314: memset(&val, 0, sizeof(DBT));
1.3 kristaps 315: memset(&rkey, 0, sizeof(DBT));
316: memset(&rval, 0, sizeof(DBT));
317:
318: val.size = sizeof(vbuf);
319: val.data = vbuf;
320: rkey.size = sizeof(recno_t);
321:
322: rec = 1;
323: ksz = 0;
1.1 kristaps 324:
325: while (NULL != (fn = *argv++)) {
326: mparse_reset(mp);
1.3 kristaps 327:
1.4 ! kristaps 328: if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
! 329: fprintf(stderr, "%s: Parse failure\n", fn);
1.1 kristaps 330: continue;
1.4 ! kristaps 331: }
1.3 kristaps 332:
1.2 kristaps 333: mparse_result(mp, &mdoc, NULL);
1.3 kristaps 334: if (NULL == mdoc)
335: continue;
336:
337: rkey.data = &rec;
338: rval.data = fn;
339: rval.size = strlen(fn) + 1;
340:
341: if (-1 == (*index->put)(index, &rkey, &rval, 0)) {
342: perror(ibbuf);
343: break;
344: }
345:
346: memset(val.data, 0, sizeof(uint32_t));
347: memcpy(val.data + 4, &rec, sizeof(uint32_t));
348:
349: pmdoc(db, fbbuf, &key, &ksz, &val, fn, mdoc);
350: rec++;
1.1 kristaps 351: }
352:
353: (*db->close)(db);
1.3 kristaps 354: (*index->close)(index);
355:
1.1 kristaps 356: mparse_free(mp);
357:
358: free(key.data);
359:
360: /* Atomically replace the file with our temporary one. */
361:
1.2 kristaps 362: if (-1 == rename(fbbuf, fbuf))
363: perror(fbuf);
1.3 kristaps 364: if (-1 == rename(ibbuf, ibuf))
365: perror(fbuf);
1.1 kristaps 366:
367: return((int)MANDOCLEVEL_OK);
368: }
369:
370: /*
371: * Initialise the stored database key whose data buffer is shared
372: * between uses (as the key must sometimes be constructed from an array
373: * of
374: */
375: static void
376: dbt_init(DBT *key, size_t *ksz)
377: {
378:
379: if (0 == *ksz) {
380: assert(0 == key->size);
381: assert(NULL == key->data);
382: key->data = mandoc_malloc(MANDOC_BUFSZ);
383: *ksz = MANDOC_BUFSZ;
384: }
385:
386: key->size = 0;
387: }
388:
389: /*
390: * Append a binary value to a database entry. This can be invoked
391: * multiple times; the buffer is automatically resized.
392: */
393: static void
394: dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
395: {
396:
397: assert(key->data);
398:
399: /* Overshoot by MANDOC_BUFSZ. */
400:
401: while (key->size + sz >= *ksz) {
402: *ksz = key->size + sz + MANDOC_BUFSZ;
403: key->data = mandoc_realloc(key->data, *ksz);
404: }
405:
406: memcpy(key->data + (int)key->size, cp, sz);
407: key->size += sz;
408: }
409:
410: /*
411: * Append a nil-terminated string to the database entry. This can be
412: * invoked multiple times. The database entry will be nil-terminated as
413: * well; if invoked multiple times, a space is put between strings.
414: */
415: static void
416: dbt_append(DBT *key, size_t *ksz, const char *cp)
417: {
418: size_t sz;
419:
420: if (0 == (sz = strlen(cp)))
421: return;
422:
1.3 kristaps 423: assert(key->data);
1.1 kristaps 424:
425: if (key->size)
426: ((char *)key->data)[(int)key->size - 1] = ' ';
427:
1.3 kristaps 428: dbt_appendb(key, ksz, cp, sz + 1);
1.1 kristaps 429: }
430:
431: /* ARGSUSED */
432: static void
433: pmdoc_Fd(MDOC_ARGS)
434: {
435: uint32_t fl;
436: const char *start, *end;
437: size_t sz;
438: char nil;
439:
440: if (SEC_SYNOPSIS != n->sec)
441: return;
442: if (NULL == (n = n->child) || MDOC_TEXT != n->type)
443: return;
1.4 ! kristaps 444:
! 445: /*
! 446: * Only consider those `Fd' macro fields that begin with an
! 447: * "inclusion" token (versus, e.g., #define).
! 448: */
1.1 kristaps 449: if (strcmp("#include", n->string))
450: return;
1.4 ! kristaps 451:
1.1 kristaps 452: if (NULL == (n = n->next) || MDOC_TEXT != n->type)
453: return;
454:
1.4 ! kristaps 455: /*
! 456: * Strip away the enclosing angle brackets and make sure we're
! 457: * not zero-length.
! 458: */
! 459:
1.1 kristaps 460: start = n->string;
461: if ('<' == *start)
462: start++;
463:
464: if (0 == (sz = strlen(start)))
465: return;
466:
467: end = &start[(int)sz - 1];
468: if ('>' == *end)
469: end--;
470:
471: nil = '\0';
472: dbt_appendb(key, ksz, start, end - start + 1);
473: dbt_appendb(key, ksz, &nil, 1);
474:
475: fl = MANDOC_INCLUDES;
476: memcpy(val->data, &fl, 4);
477: }
478:
479: /* ARGSUSED */
480: static void
481: pmdoc_In(MDOC_ARGS)
482: {
483: uint32_t fl;
484:
485: if (SEC_SYNOPSIS != n->sec)
486: return;
487: if (NULL == n->child || MDOC_TEXT != n->child->type)
488: return;
489:
490: dbt_append(key, ksz, n->child->string);
491: fl = MANDOC_INCLUDES;
492: memcpy(val->data, &fl, 4);
493: }
494:
495: /* ARGSUSED */
496: static void
497: pmdoc_Fn(MDOC_ARGS)
498: {
499: uint32_t fl;
500: const char *cp;
501:
502: if (SEC_SYNOPSIS != n->sec)
503: return;
504: if (NULL == n->child || MDOC_TEXT != n->child->type)
505: return;
506:
507: /* .Fn "struct type *arg" "foo" */
508:
509: cp = strrchr(n->child->string, ' ');
510: if (NULL == cp)
511: cp = n->child->string;
512:
1.4 ! kristaps 513: /* Strip away pointer symbol. */
1.1 kristaps 514:
515: while ('*' == *cp)
516: cp++;
517:
518: dbt_append(key, ksz, cp);
519: fl = MANDOC_FUNCTION;
520: memcpy(val->data, &fl, 4);
521: }
522:
523: /* ARGSUSED */
524: static void
525: pmdoc_Vt(MDOC_ARGS)
526: {
527: uint32_t fl;
528: const char *start, *end;
529: size_t sz;
530: char nil;
531:
532: if (SEC_SYNOPSIS != n->sec)
533: return;
534: if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
535: return;
536: if (NULL == n->child || MDOC_TEXT != n->child->type)
537: return;
538:
539: /*
1.4 ! kristaps 540: * Strip away leading pointer symbol '*' and trailing ';'.
1.1 kristaps 541: */
542:
543: start = n->last->string;
544:
545: while ('*' == *start)
546: start++;
547:
548: if (0 == (sz = strlen(start)))
549: return;
550:
551: end = &start[sz - 1];
552: while (end > start && ';' == *end)
553: end--;
554:
555: if (end == start)
556: return;
557:
558: nil = '\0';
559: dbt_appendb(key, ksz, start, end - start + 1);
560: dbt_appendb(key, ksz, &nil, 1);
561: fl = MANDOC_VARIABLE;
562: memcpy(val->data, &fl, 4);
563: }
564:
565: /* ARGSUSED */
566: static void
567: pmdoc_Fo(MDOC_ARGS)
568: {
569: uint32_t fl;
570:
571: if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
572: return;
573: if (NULL == n->child || MDOC_TEXT != n->child->type)
574: return;
575:
576: dbt_append(key, ksz, n->child->string);
577: fl = MANDOC_FUNCTION;
578: memcpy(val->data, &fl, 4);
579: }
580:
581: /* ARGSUSED */
582: static void
583: pmdoc_Nm(MDOC_ARGS)
584: {
585: uint32_t fl;
586:
587: if (SEC_NAME == n->sec) {
588: for (n = n->child; n; n = n->next) {
589: if (MDOC_TEXT != n->type)
590: continue;
591: dbt_append(key, ksz, n->string);
592: }
593: fl = MANDOC_NAME;
594: memcpy(val->data, &fl, 4);
595: return;
596: } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
597: return;
598:
599: for (n = n->child; n; n = n->next) {
600: if (MDOC_TEXT != n->type)
601: continue;
602: dbt_append(key, ksz, n->string);
603: }
604:
605: fl = MANDOC_UTILITY;
606: memcpy(val->data, &fl, 4);
607: }
608:
609: /*
610: * Call out to per-macro handlers after clearing the persistent database
611: * key. If the macro sets the database key, flush it to the database.
612: */
613: static void
614: pmdoc_node(MDOC_ARGS)
615: {
616:
617: if (NULL == n)
618: return;
619:
620: switch (n->type) {
621: case (MDOC_HEAD):
622: /* FALLTHROUGH */
623: case (MDOC_BODY):
624: /* FALLTHROUGH */
625: case (MDOC_TAIL):
626: /* FALLTHROUGH */
627: case (MDOC_BLOCK):
628: /* FALLTHROUGH */
629: case (MDOC_ELEM):
630: if (NULL == mdocs[n->tok])
631: break;
632:
633: dbt_init(key, ksz);
634: (*mdocs[n->tok])(db, dbn, key, ksz, val, n);
635:
636: if (0 == key->size)
637: break;
638: if (0 == (*db->put)(db, key, val, 0))
639: break;
640:
641: perror(dbn);
642: exit((int)MANDOCLEVEL_SYSERR);
643: /* NOTREACHED */
644: default:
645: break;
646: }
647:
648: pmdoc_node(db, dbn, key, ksz, val, n->child);
649: pmdoc_node(db, dbn, key, ksz, val, n->next);
650: }
651:
652: static void
653: pmdoc(DB *db, const char *dbn,
1.3 kristaps 654: DBT *key, size_t *ksz, DBT *val,
1.1 kristaps 655: const char *path, struct mdoc *m)
656: {
657:
658: pmdoc_node(db, dbn, key, ksz, val, mdoc_node(m));
659: }
660:
661: static void
662: usage(void)
663: {
664:
665: fprintf(stderr, "usage: %s "
1.2 kristaps 666: "[-d path] "
1.1 kristaps 667: "[file...]\n",
668: progname);
669: }
CVSweb