Annotation of mandoc/dba.c, Revision 1.9
1.9 ! schwarze 1: /* $Id: dba.c,v 1.8 2016/08/17 20:46:56 schwarze Exp $ */
1.1 schwarze 2: /*
1.9 ! schwarze 3: * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: *
17: * Allocation-based version of the mandoc database, for read-write access.
18: * The interface is defined in "dba.h".
19: */
1.5 schwarze 20: #include "config.h"
21:
1.1 schwarze 22: #include <sys/types.h>
1.5 schwarze 23: #if HAVE_ENDIAN
1.4 schwarze 24: #include <endian.h>
1.5 schwarze 25: #elif HAVE_SYS_ENDIAN
26: #include <sys/endian.h>
27: #elif HAVE_NTOHL
28: #include <arpa/inet.h>
29: #endif
1.1 schwarze 30: #include <errno.h>
1.9 ! schwarze 31: #include <stddef.h>
1.1 schwarze 32: #include <stdint.h>
33: #include <stdlib.h>
34: #include <string.h>
35: #include <unistd.h>
36:
37: #include "mandoc_aux.h"
1.9 ! schwarze 38: #include "mandoc_ohash.h"
1.1 schwarze 39: #include "mansearch.h"
40: #include "dba_write.h"
41: #include "dba_array.h"
42: #include "dba.h"
43:
1.9 ! schwarze 44: struct macro_entry {
! 45: struct dba_array *pages;
! 46: char value[];
! 47: };
! 48:
1.1 schwarze 49: static void *prepend(const char *, char);
50: static void dba_pages_write(struct dba_array *);
51: static int compare_names(const void *, const void *);
1.3 schwarze 52: static int compare_strings(const void *, const void *);
1.9 ! schwarze 53:
! 54: static struct macro_entry
! 55: *get_macro_entry(struct ohash *, const char *, int32_t);
1.1 schwarze 56: static void dba_macros_write(struct dba_array *);
1.9 ! schwarze 57: static void dba_macro_write(struct ohash *);
! 58: static int compare_entries(const void *, const void *);
1.1 schwarze 59:
60:
61: /*** top-level functions **********************************************/
62:
63: struct dba *
64: dba_new(int32_t npages)
65: {
66: struct dba *dba;
1.9 ! schwarze 67: struct ohash *macro;
1.1 schwarze 68: int32_t im;
69:
70: dba = mandoc_malloc(sizeof(*dba));
71: dba->pages = dba_array_new(npages, DBA_GROW);
72: dba->macros = dba_array_new(MACRO_MAX, 0);
1.9 ! schwarze 73: for (im = 0; im < MACRO_MAX; im++) {
! 74: macro = mandoc_malloc(sizeof(*macro));
! 75: mandoc_ohash_init(macro, 4,
! 76: offsetof(struct macro_entry, value));
! 77: dba_array_set(dba->macros, im, macro);
! 78: }
1.1 schwarze 79: return dba;
80: }
81:
82: void
83: dba_free(struct dba *dba)
84: {
1.9 ! schwarze 85: struct dba_array *page;
! 86: struct ohash *macro;
! 87: struct macro_entry *entry;
! 88: unsigned int slot;
1.1 schwarze 89:
90: dba_array_FOREACH(dba->macros, macro) {
1.9 ! schwarze 91: for (entry = ohash_first(macro, &slot); entry != NULL;
! 92: entry = ohash_next(macro, &slot)) {
! 93: dba_array_free(entry->pages);
! 94: free(entry);
1.1 schwarze 95: }
1.9 ! schwarze 96: ohash_delete(macro);
! 97: free(macro);
1.1 schwarze 98: }
99: dba_array_free(dba->macros);
100:
101: dba_array_undel(dba->pages);
102: dba_array_FOREACH(dba->pages, page) {
103: dba_array_free(dba_array_get(page, DBP_NAME));
104: dba_array_free(dba_array_get(page, DBP_SECT));
105: dba_array_free(dba_array_get(page, DBP_ARCH));
106: free(dba_array_get(page, DBP_DESC));
107: dba_array_free(dba_array_get(page, DBP_FILE));
108: dba_array_free(page);
109: }
110: dba_array_free(dba->pages);
111:
112: free(dba);
113: }
114:
115: /*
116: * Write the complete mandoc database to disk; the format is:
117: * - One integer each for magic and version.
118: * - One pointer each to the macros table and to the final magic.
119: * - The pages table.
120: * - The macros table.
121: * - And at the very end, the magic integer again.
122: */
123: int
124: dba_write(const char *fname, struct dba *dba)
125: {
126: int save_errno;
127: int32_t pos_end, pos_macros, pos_macros_ptr;
128:
129: if (dba_open(fname) == -1)
130: return -1;
131: dba_int_write(MANDOCDB_MAGIC);
132: dba_int_write(MANDOCDB_VERSION);
133: pos_macros_ptr = dba_skip(1, 2);
134: dba_pages_write(dba->pages);
135: pos_macros = dba_tell();
136: dba_macros_write(dba->macros);
137: pos_end = dba_tell();
138: dba_int_write(MANDOCDB_MAGIC);
139: dba_seek(pos_macros_ptr);
140: dba_int_write(pos_macros);
141: dba_int_write(pos_end);
142: if (dba_close() == -1) {
143: save_errno = errno;
144: unlink(fname);
145: errno = save_errno;
146: return -1;
147: }
148: return 0;
149: }
150:
151:
152: /*** functions for handling pages *************************************/
153:
154: /*
155: * Create a new page and append it to the pages table.
156: */
157: struct dba_array *
1.8 schwarze 158: dba_page_new(struct dba_array *pages, const char *arch,
159: const char *desc, const char *file, enum form form)
1.1 schwarze 160: {
161: struct dba_array *page, *entry;
162:
163: page = dba_array_new(DBP_MAX, 0);
164: entry = dba_array_new(1, DBA_STR | DBA_GROW);
165: dba_array_add(page, entry);
166: entry = dba_array_new(1, DBA_STR | DBA_GROW);
167: dba_array_add(page, entry);
168: if (arch != NULL && *arch != '\0') {
169: entry = dba_array_new(1, DBA_STR | DBA_GROW);
170: dba_array_add(entry, (void *)arch);
171: } else
172: entry = NULL;
173: dba_array_add(page, entry);
174: dba_array_add(page, mandoc_strdup(desc));
175: entry = dba_array_new(1, DBA_STR | DBA_GROW);
176: dba_array_add(entry, prepend(file, form));
177: dba_array_add(page, entry);
178: dba_array_add(pages, page);
179: return page;
180: }
181:
182: /*
183: * Add a section, architecture, or file name to an existing page.
184: * Passing the NULL pointer for the architecture makes the page MI.
185: * In that case, any earlier or later architectures are ignored.
186: */
187: void
188: dba_page_add(struct dba_array *page, int32_t ie, const char *str)
189: {
190: struct dba_array *entries;
191: char *entry;
192:
193: entries = dba_array_get(page, ie);
194: if (ie == DBP_ARCH) {
195: if (entries == NULL)
196: return;
1.6 schwarze 197: if (str == NULL || *str == '\0') {
1.1 schwarze 198: dba_array_free(entries);
199: dba_array_set(page, DBP_ARCH, NULL);
200: return;
201: }
202: }
203: if (*str == '\0')
204: return;
1.2 schwarze 205: dba_array_FOREACH(entries, entry) {
206: if (ie == DBP_FILE && *entry < ' ')
207: entry++;
1.1 schwarze 208: if (strcmp(entry, str) == 0)
209: return;
1.2 schwarze 210: }
1.1 schwarze 211: dba_array_add(entries, (void *)str);
212: }
213:
214: /*
215: * Add an additional name to an existing page.
216: */
217: void
218: dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
219: {
220: struct dba_array *entries;
221: char *entry;
222: char maskbyte;
223:
224: if (*name == '\0')
225: return;
226: maskbyte = mask & NAME_MASK;
227: entries = dba_array_get(page, DBP_NAME);
228: dba_array_FOREACH(entries, entry) {
229: if (strcmp(entry + 1, name) == 0) {
230: *entry |= maskbyte;
231: return;
232: }
233: }
234: dba_array_add(entries, prepend(name, maskbyte));
235: }
236:
237: /*
238: * Return a pointer to a temporary copy of instr with inbyte prepended.
239: */
240: static void *
241: prepend(const char *instr, char inbyte)
242: {
243: static char *outstr = NULL;
244: static size_t outlen = 0;
245: size_t newlen;
246:
247: newlen = strlen(instr) + 1;
248: if (newlen > outlen) {
249: outstr = mandoc_realloc(outstr, newlen + 1);
250: outlen = newlen;
251: }
252: *outstr = inbyte;
253: memcpy(outstr + 1, instr, newlen);
254: return outstr;
255: }
256:
257: /*
258: * Write the pages table to disk; the format is:
259: * - One integer containing the number of pages.
260: * - For each page, five pointers to the names, sections,
261: * architectures, description, and file names of the page.
262: * MI pages write 0 instead of the architecture pointer.
263: * - One list each for names, sections, architectures, descriptions and
264: * file names. The description for each page ends with a NUL byte.
265: * For all the other lists, each string ends with a NUL byte,
266: * and the last string for a page ends with two NUL bytes.
267: * - To assure alignment of following integers,
268: * the end is padded with NUL bytes up to a multiple of four bytes.
269: */
270: static void
271: dba_pages_write(struct dba_array *pages)
272: {
1.3 schwarze 273: struct dba_array *page, *entry;
1.1 schwarze 274: int32_t pos_pages, pos_end;
275:
276: pos_pages = dba_array_writelen(pages, 5);
277: dba_array_FOREACH(pages, page) {
278: dba_array_setpos(page, DBP_NAME, dba_tell());
1.3 schwarze 279: entry = dba_array_get(page, DBP_NAME);
280: dba_array_sort(entry, compare_names);
281: dba_array_writelst(entry);
1.1 schwarze 282: }
283: dba_array_FOREACH(pages, page) {
284: dba_array_setpos(page, DBP_SECT, dba_tell());
1.3 schwarze 285: entry = dba_array_get(page, DBP_SECT);
286: dba_array_sort(entry, compare_strings);
287: dba_array_writelst(entry);
1.1 schwarze 288: }
289: dba_array_FOREACH(pages, page) {
290: if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
291: dba_array_setpos(page, DBP_ARCH, dba_tell());
1.3 schwarze 292: dba_array_sort(entry, compare_strings);
1.1 schwarze 293: dba_array_writelst(entry);
294: } else
295: dba_array_setpos(page, DBP_ARCH, 0);
296: }
297: dba_array_FOREACH(pages, page) {
298: dba_array_setpos(page, DBP_DESC, dba_tell());
299: dba_str_write(dba_array_get(page, DBP_DESC));
300: }
301: dba_array_FOREACH(pages, page) {
302: dba_array_setpos(page, DBP_FILE, dba_tell());
303: dba_array_writelst(dba_array_get(page, DBP_FILE));
304: }
305: pos_end = dba_align();
306: dba_seek(pos_pages);
307: dba_array_FOREACH(pages, page)
308: dba_array_writepos(page);
309: dba_seek(pos_end);
310: }
311:
312: static int
313: compare_names(const void *vp1, const void *vp2)
314: {
315: const char *cp1, *cp2;
316: int diff;
317:
318: cp1 = *(char **)vp1;
319: cp2 = *(char **)vp2;
320: return (diff = *cp2 - *cp1) ? diff :
321: strcasecmp(cp1 + 1, cp2 + 1);
322: }
323:
1.3 schwarze 324: static int
325: compare_strings(const void *vp1, const void *vp2)
326: {
327: const char *cp1, *cp2;
328:
329: cp1 = *(char **)vp1;
330: cp2 = *(char **)vp2;
331: return strcmp(cp1, cp2);
332: }
1.1 schwarze 333:
334: /*** functions for handling macros ************************************/
335:
336: /*
1.9 ! schwarze 337: * In the hash table for a single macro, look up an entry by
! 338: * the macro value or add an empty one if it doesn't exist yet.
! 339: */
! 340: static struct macro_entry *
! 341: get_macro_entry(struct ohash *macro, const char *value, int32_t np)
! 342: {
! 343: struct macro_entry *entry;
! 344: size_t len;
! 345: unsigned int slot;
! 346:
! 347: slot = ohash_qlookup(macro, value);
! 348: if ((entry = ohash_find(macro, slot)) == NULL) {
! 349: len = strlen(value) + 1;
! 350: entry = mandoc_malloc(sizeof(*entry) + len);
! 351: memcpy(&entry->value, value, len);
! 352: entry->pages = dba_array_new(np, DBA_GROW);
! 353: ohash_insert(macro, slot, entry);
! 354: }
! 355: return entry;
! 356: }
! 357:
! 358: /*
! 359: * In addition to get_macro_entry(), add multiple page references,
! 360: * converting them from the on-disk format (byte offsets in the file)
! 361: * to page pointers in memory.
1.1 schwarze 362: */
363: void
364: dba_macro_new(struct dba *dba, int32_t im, const char *value,
365: const int32_t *pp)
366: {
1.9 ! schwarze 367: struct macro_entry *entry;
1.1 schwarze 368: const int32_t *ip;
369: int32_t np;
370:
371: np = 0;
372: for (ip = pp; *ip; ip++)
373: np++;
1.9 ! schwarze 374:
! 375: entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
1.1 schwarze 376: for (ip = pp; *ip; ip++)
1.9 ! schwarze 377: dba_array_add(entry->pages, dba_array_get(dba->pages,
1.1 schwarze 378: be32toh(*ip) / 5 / sizeof(*ip) - 1));
379: }
380:
381: /*
1.9 ! schwarze 382: * In addition to get_macro_entry(), add one page reference,
! 383: * directly taking the in-memory page pointer as an argument.
1.1 schwarze 384: */
385: void
386: dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
387: struct dba_array *page)
388: {
1.9 ! schwarze 389: struct macro_entry *entry;
1.1 schwarze 390:
391: if (*value == '\0')
392: return;
1.9 ! schwarze 393: entry = get_macro_entry(dba_array_get(macros, im), value, 1);
! 394: dba_array_add(entry->pages, page);
1.1 schwarze 395: }
396:
397: /*
398: * Write the macros table to disk; the format is:
399: * - The number of macro tables (actually, MACRO_MAX).
400: * - That number of pointers to the individual macro tables.
401: * - The individual macro tables.
402: */
403: static void
404: dba_macros_write(struct dba_array *macros)
405: {
1.9 ! schwarze 406: struct ohash *macro;
1.1 schwarze 407: int32_t im, pos_macros, pos_end;
408:
409: pos_macros = dba_array_writelen(macros, 1);
410: im = 0;
411: dba_array_FOREACH(macros, macro) {
412: dba_array_setpos(macros, im++, dba_tell());
413: dba_macro_write(macro);
414: }
415: pos_end = dba_tell();
416: dba_seek(pos_macros);
417: dba_array_writepos(macros);
418: dba_seek(pos_end);
419: }
420:
421: /*
422: * Write one individual macro table to disk; the format is:
423: * - The number of entries in the table.
424: * - For each entry, two pointers, the first one to the value
425: * and the second one to the list of pages.
426: * - A list of values, each ending in a NUL byte.
427: * - To assure alignment of following integers,
428: * padding with NUL bytes up to a multiple of four bytes.
429: * - A list of pointers to pages, each list ending in a 0 integer.
430: */
431: static void
1.9 ! schwarze 432: dba_macro_write(struct ohash *macro)
1.1 schwarze 433: {
1.9 ! schwarze 434: struct macro_entry **entries, *entry;
! 435: struct dba_array *page;
! 436: int32_t *kpos, *dpos;
! 437: unsigned int ie, ne, slot;
! 438: int use;
! 439: int32_t addr, pos_macro, pos_end;
! 440:
! 441: /* Temporary storage for filtering and sorting. */
! 442:
! 443: ne = ohash_entries(macro);
! 444: entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
! 445: kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
! 446: dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
! 447:
! 448: /* Build a list of non-empty entries and sort it. */
! 449:
! 450: ne = 0;
! 451: for (entry = ohash_first(macro, &slot); entry != NULL;
! 452: entry = ohash_next(macro, &slot)) {
! 453: use = 0;
! 454: dba_array_FOREACH(entry->pages, page)
1.1 schwarze 455: if (dba_array_getpos(page))
1.9 ! schwarze 456: use = 1;
! 457: if (use)
! 458: entries[ne++] = entry;
! 459: }
! 460: qsort(entries, ne, sizeof(*entries), compare_entries);
! 461:
! 462: /* Number of entries, and space for the pointer pairs. */
! 463:
! 464: dba_int_write(ne);
! 465: pos_macro = dba_skip(2, ne);
! 466:
! 467: /* String table. */
! 468:
! 469: for (ie = 0; ie < ne; ie++) {
! 470: kpos[ie] = dba_tell();
! 471: dba_str_write(entries[ie]->value);
1.1 schwarze 472: }
473: dba_align();
1.9 ! schwarze 474:
! 475: /* Pages table. */
! 476:
! 477: for (ie = 0; ie < ne; ie++) {
! 478: dpos[ie] = dba_tell();
! 479: dba_array_FOREACH(entries[ie]->pages, page)
1.1 schwarze 480: if ((addr = dba_array_getpos(page)))
481: dba_int_write(addr);
482: dba_int_write(0);
483: }
484: pos_end = dba_tell();
1.9 ! schwarze 485:
! 486: /* Fill in the pointer pairs. */
! 487:
1.1 schwarze 488: dba_seek(pos_macro);
1.9 ! schwarze 489: for (ie = 0; ie < ne; ie++) {
! 490: dba_int_write(kpos[ie]);
! 491: dba_int_write(dpos[ie]);
! 492: }
1.1 schwarze 493: dba_seek(pos_end);
1.9 ! schwarze 494:
! 495: free(entries);
! 496: free(kpos);
! 497: free(dpos);
! 498: }
! 499:
! 500: static int
! 501: compare_entries(const void *vp1, const void *vp2)
! 502: {
! 503: const struct macro_entry *ep1, *ep2;
! 504:
! 505: ep1 = *(struct macro_entry **)vp1;
! 506: ep2 = *(struct macro_entry **)vp2;
! 507: return strcmp(ep1->value, ep2->value);
1.1 schwarze 508: }
CVSweb