Annotation of mandoc/read.c, Revision 1.204
1.204 ! schwarze 1: /* $Id: read.c,v 1.203 2018/12/14 01:18:26 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.196 schwarze 4: * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
1.42 schwarze 5: * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
1.1 kristaps 6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
1.133 schwarze 11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.133 schwarze 13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
1.11 kristaps 19: #include "config.h"
20:
1.80 schwarze 21: #include <sys/types.h>
1.82 schwarze 22: #include <sys/mman.h>
1.80 schwarze 23: #include <sys/stat.h>
1.1 kristaps 24:
25: #include <assert.h>
26: #include <ctype.h>
1.40 schwarze 27: #include <errno.h>
1.1 kristaps 28: #include <fcntl.h>
1.3 kristaps 29: #include <stdarg.h>
1.1 kristaps 30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33: #include <unistd.h>
1.140 schwarze 34: #include <zlib.h>
1.1 kristaps 35:
1.133 schwarze 36: #include "mandoc_aux.h"
1.1 kristaps 37: #include "mandoc.h"
1.133 schwarze 38: #include "roff.h"
1.1 kristaps 39: #include "mdoc.h"
40: #include "man.h"
1.202 schwarze 41: #include "mandoc_parse.h"
1.133 schwarze 42: #include "libmandoc.h"
1.201 schwarze 43: #include "roff_int.h"
1.1 kristaps 44:
45: #define REPARSE_LIMIT 1000
46:
47: struct mparse {
1.160 schwarze 48: struct roff *roff; /* roff parser (!NULL) */
1.134 schwarze 49: struct roff_man *man; /* man parser */
1.45 schwarze 50: char *sodest; /* filename pointed to by .so */
1.83 schwarze 51: struct buf *primary; /* buffer currently being parsed */
1.198 schwarze 52: struct buf *secondary; /* copy of top level input */
1.199 schwarze 53: struct buf *loop; /* open .while request line */
1.179 schwarze 54: const char *os_s; /* default operating system */
1.83 schwarze 55: int options; /* parser options */
1.140 schwarze 56: int gzip; /* current input file is gzipped */
1.93 schwarze 57: int filenc; /* encoding of the current file */
1.1 kristaps 58: int reparse_count; /* finite interp. stack */
1.83 schwarze 59: int line; /* line number in the file */
1.1 kristaps 60: };
61:
1.84 schwarze 62: static void choose_parser(struct mparse *);
1.198 schwarze 63: static void free_buf_list(struct buf *);
1.1 kristaps 64: static void resize_buf(struct buf *, size_t);
1.199 schwarze 65: static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
1.203 schwarze 66: static int read_whole_file(struct mparse *, int, struct buf *, int *);
1.1 kristaps 67: static void mparse_end(struct mparse *);
1.37 schwarze 68: static void mparse_parse_buffer(struct mparse *, struct buf,
69: const char *);
1.1 kristaps 70:
1.47 schwarze 71:
1.1 kristaps 72: static void
73: resize_buf(struct buf *buf, size_t initial)
74: {
75:
76: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
77: buf->buf = mandoc_realloc(buf->buf, buf->sz);
78: }
79:
80: static void
1.198 schwarze 81: free_buf_list(struct buf *buf)
82: {
83: struct buf *tmp;
84:
85: while (buf != NULL) {
86: tmp = buf;
87: buf = tmp->next;
88: free(tmp->buf);
89: free(tmp);
90: }
91: }
92:
93: static void
1.84 schwarze 94: choose_parser(struct mparse *curp)
1.1 kristaps 95: {
1.83 schwarze 96: char *cp, *ep;
97: int format;
1.1 kristaps 98:
1.83 schwarze 99: /*
100: * If neither command line arguments -mdoc or -man select
101: * a parser nor the roff parser found a .Dd or .TH macro
102: * yet, look ahead in the main input buffer.
103: */
104:
105: if ((format = roff_getformat(curp->roff)) == 0) {
106: cp = curp->primary->buf;
107: ep = cp + curp->primary->sz;
108: while (cp < ep) {
1.85 schwarze 109: if (*cp == '.' || *cp == '\'') {
1.83 schwarze 110: cp++;
111: if (cp[0] == 'D' && cp[1] == 'd') {
112: format = MPARSE_MDOC;
113: break;
114: }
115: if (cp[0] == 'T' && cp[1] == 'H') {
116: format = MPARSE_MAN;
117: break;
118: }
119: }
120: cp = memchr(cp, '\n', ep - cp);
121: if (cp == NULL)
122: break;
123: cp++;
124: }
1.1 kristaps 125: }
126:
1.83 schwarze 127: if (format == MPARSE_MDOC) {
1.137 schwarze 128: curp->man->macroset = MACROSET_MDOC;
1.164 schwarze 129: if (curp->man->mdocmac == NULL)
130: curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
1.137 schwarze 131: } else {
132: curp->man->macroset = MACROSET_MAN;
1.164 schwarze 133: if (curp->man->manmac == NULL)
134: curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
1.47 schwarze 135: }
1.164 schwarze 136: curp->man->first->tok = TOKEN_NONE;
1.1 kristaps 137: }
138:
139: /*
1.95 schwarze 140: * Main parse routine for a buffer.
141: * It assumes encoding and line numbering are already set up.
142: * It can recurse directly (for invocations of user-defined
143: * macros, inline equations, and input line traps)
144: * and indirectly (for .so file inclusion).
1.1 kristaps 145: */
1.199 schwarze 146: static int
1.95 schwarze 147: mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
1.1 kristaps 148: {
149: struct buf ln;
1.199 schwarze 150: struct buf *firstln, *lastln, *thisln, *loop;
1.110 schwarze 151: char *cp;
1.95 schwarze 152: size_t pos; /* byte number in the ln buffer */
1.199 schwarze 153: int line_result, result;
1.100 schwarze 154: int of;
1.1 kristaps 155: int lnn; /* line number in the real file */
1.110 schwarze 156: int fd;
1.199 schwarze 157: int inloop; /* Saw .while on this level. */
1.1 kristaps 158: unsigned char c;
159:
1.198 schwarze 160: ln.sz = 256;
161: ln.buf = mandoc_malloc(ln.sz);
162: ln.next = NULL;
1.199 schwarze 163: firstln = loop = NULL;
1.47 schwarze 164: lnn = curp->line;
165: pos = 0;
1.199 schwarze 166: inloop = 0;
1.198 schwarze 167: result = ROFF_CONT;
1.1 kristaps 168:
1.199 schwarze 169: while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
1.1 kristaps 170: if (start) {
171: curp->line = lnn;
172: curp->reparse_count = 0;
1.93 schwarze 173:
174: if (lnn < 3 &&
175: curp->filenc & MPARSE_UTF8 &&
1.95 schwarze 176: curp->filenc & MPARSE_LATIN1)
177: curp->filenc = preconv_cue(&blk, i);
1.1 kristaps 178: }
179:
1.95 schwarze 180: while (i < blk.sz && (start || blk.buf[i] != '\0')) {
1.1 kristaps 181:
182: /*
183: * When finding an unescaped newline character,
184: * leave the character loop to process the line.
185: * Skip a preceding carriage return, if any.
186: */
187:
1.95 schwarze 188: if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
1.1 kristaps 189: '\n' == blk.buf[i + 1])
190: ++i;
191: if ('\n' == blk.buf[i]) {
192: ++i;
193: ++lnn;
194: break;
195: }
196:
1.35 schwarze 197: /*
1.93 schwarze 198: * Make sure we have space for the worst
1.198 schwarze 199: * case of 12 bytes: "\\[u10ffff]\n\0"
1.35 schwarze 200: */
201:
1.198 schwarze 202: if (pos + 12 > ln.sz)
1.35 schwarze 203: resize_buf(&ln, 256);
204:
1.47 schwarze 205: /*
1.93 schwarze 206: * Encode 8-bit input.
1.1 kristaps 207: */
208:
1.93 schwarze 209: c = blk.buf[i];
210: if (c & 0x80) {
1.95 schwarze 211: if ( ! (curp->filenc && preconv_encode(
212: &blk, &i, &ln, &pos, &curp->filenc))) {
1.114 schwarze 213: mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
214: curp->line, pos, "0x%x", c);
1.93 schwarze 215: ln.buf[pos++] = '?';
216: i++;
217: }
218: continue;
219: }
220:
221: /*
222: * Exclude control characters.
223: */
1.1 kristaps 224:
1.93 schwarze 225: if (c == 0x7f || (c < 0x20 && c != 0x09)) {
1.114 schwarze 226: mandoc_vmsg(c == 0x00 || c == 0x04 ||
227: c > 0x0a ? MANDOCERR_CHAR_BAD :
228: MANDOCERR_CHAR_UNSUPP,
229: curp, curp->line, pos, "0x%x", c);
1.1 kristaps 230: i++;
1.127 schwarze 231: if (c != '\r')
232: ln.buf[pos++] = '?';
1.1 kristaps 233: continue;
234: }
235:
236: ln.buf[pos++] = blk.buf[i++];
237: }
1.198 schwarze 238: ln.buf[pos] = '\0';
239:
240: /*
241: * Maintain a lookaside buffer of all lines.
242: * parsed from this input source.
243: */
244:
245: thisln = mandoc_malloc(sizeof(*thisln));
246: thisln->buf = mandoc_strdup(ln.buf);
247: thisln->sz = strlen(ln.buf) + 1;
248: thisln->next = NULL;
249: if (firstln == NULL) {
250: firstln = lastln = thisln;
251: if (curp->secondary == NULL)
252: curp->secondary = firstln;
253: } else {
254: lastln->next = thisln;
255: lastln = thisln;
256: }
1.1 kristaps 257:
1.198 schwarze 258: /* XXX Ugly hack to mark the end of the input. */
1.1 kristaps 259:
1.198 schwarze 260: if (i == blk.sz || blk.buf[i] == '\0') {
1.170 schwarze 261: ln.buf[pos++] = '\n';
1.198 schwarze 262: ln.buf[pos] = '\0';
263: }
1.1 kristaps 264:
265: /*
266: * A significant amount of complexity is contained by
267: * the roff preprocessor. It's line-oriented but can be
268: * expressed on one line, so we need at times to
269: * readjust our starting point and re-run it. The roff
270: * preprocessor can also readjust the buffers with new
271: * data, so we pass them in wholesale.
272: */
273:
274: of = 0;
275: rerun:
1.197 schwarze 276: line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
1.1 kristaps 277:
1.199 schwarze 278: /* Process options. */
279:
280: if (line_result & ROFF_APPEND)
281: assert(line_result == (ROFF_IGN | ROFF_APPEND));
282:
283: if (line_result & ROFF_USERCALL)
284: assert((line_result & ROFF_MASK) == ROFF_REPARSE);
285:
286: if (line_result & ROFF_USERRET) {
287: assert(line_result == (ROFF_IGN | ROFF_USERRET));
288: if (start == 0) {
289: /* Return from the current macro. */
290: result = ROFF_USERRET;
291: goto out;
292: }
293: }
294:
295: switch (line_result & ROFF_LOOPMASK) {
296: case ROFF_IGN:
297: break;
298: case ROFF_WHILE:
299: if (curp->loop != NULL) {
300: if (loop == curp->loop)
301: break;
302: mandoc_msg(MANDOCERR_WHILE_NEST,
303: curp, curp->line, pos, NULL);
304: }
305: curp->loop = thisln;
306: loop = NULL;
307: inloop = 1;
308: break;
309: case ROFF_LOOPCONT:
310: case ROFF_LOOPEXIT:
311: if (curp->loop == NULL) {
312: mandoc_msg(MANDOCERR_WHILE_FAIL,
313: curp, curp->line, pos, NULL);
314: break;
315: }
316: if (inloop == 0) {
317: mandoc_msg(MANDOCERR_WHILE_INTO,
318: curp, curp->line, pos, NULL);
319: curp->loop = loop = NULL;
320: break;
321: }
322: if (line_result & ROFF_LOOPCONT)
323: loop = curp->loop;
324: else {
325: curp->loop = loop = NULL;
326: inloop = 0;
327: }
328: break;
329: default:
330: abort();
331: }
332:
333: /* Process the main instruction from the roff parser. */
334:
335: switch (line_result & ROFF_MASK) {
336: case ROFF_IGN:
337: break;
338: case ROFF_CONT:
339: if (curp->man->macroset == MACROSET_NONE)
340: choose_parser(curp);
341: if ((curp->man->macroset == MACROSET_MDOC ?
342: mdoc_parseln(curp->man, curp->line, ln.buf, of) :
343: man_parseln(curp->man, curp->line, ln.buf, of)
344: ) == 2)
345: goto out;
346: break;
347: case ROFF_RERUN:
348: goto rerun;
1.47 schwarze 349: case ROFF_REPARSE:
1.197 schwarze 350: if (++curp->reparse_count > REPARSE_LIMIT) {
1.199 schwarze 351: /* Abort and return to the top level. */
1.198 schwarze 352: result = ROFF_IGN;
1.3 kristaps 353: mandoc_msg(MANDOCERR_ROFFLOOP, curp,
1.47 schwarze 354: curp->line, pos, NULL);
1.199 schwarze 355: goto out;
1.197 schwarze 356: }
1.199 schwarze 357: result = mparse_buf_r(curp, ln, of, 0);
358: if (line_result & ROFF_USERCALL) {
359: roff_userret(curp->roff);
360: /* Continue normally. */
361: if (result & ROFF_USERRET)
362: result = ROFF_CONT;
1.163 schwarze 363: }
1.199 schwarze 364: if (start == 0 && result != ROFF_CONT)
365: goto out;
366: break;
1.47 schwarze 367: case ROFF_SO:
1.95 schwarze 368: if ( ! (curp->options & MPARSE_SO) &&
369: (i >= blk.sz || blk.buf[i] == '\0')) {
1.45 schwarze 370: curp->sodest = mandoc_strdup(ln.buf + of);
1.198 schwarze 371: goto out;
1.45 schwarze 372: }
1.148 schwarze 373: if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
1.110 schwarze 374: mparse_readfd(curp, fd, ln.buf + of);
1.147 schwarze 375: close(fd);
1.113 schwarze 376: } else {
1.52 schwarze 377: mandoc_vmsg(MANDOCERR_SO_FAIL,
378: curp, curp->line, pos,
379: ".so %s", ln.buf + of);
1.110 schwarze 380: ln.sz = mandoc_asprintf(&cp,
381: ".sp\nSee the file %s.\n.sp",
382: ln.buf + of);
383: free(ln.buf);
384: ln.buf = cp;
385: of = 0;
386: mparse_buf_r(curp, ln, of, 0);
1.52 schwarze 387: }
1.199 schwarze 388: break;
1.1 kristaps 389: default:
1.199 schwarze 390: abort();
1.1 kristaps 391: }
392:
1.199 schwarze 393: /* Start the next input line. */
1.1 kristaps 394:
1.199 schwarze 395: if (loop != NULL &&
396: (line_result & ROFF_LOOPMASK) == ROFF_IGN)
397: loop = loop->next;
398:
399: if (loop != NULL) {
400: if ((line_result & ROFF_APPEND) == 0)
401: *ln.buf = '\0';
402: if (ln.sz < loop->sz)
403: resize_buf(&ln, loop->sz);
404: (void)strlcat(ln.buf, loop->buf, ln.sz);
405: of = 0;
406: goto rerun;
407: }
1.1 kristaps 408:
1.199 schwarze 409: pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
1.1 kristaps 410: }
1.198 schwarze 411: out:
1.199 schwarze 412: if (inloop) {
413: if (result != ROFF_USERRET)
414: mandoc_msg(MANDOCERR_WHILE_OUTOF, curp,
415: curp->line, pos, NULL);
416: curp->loop = NULL;
417: }
1.1 kristaps 418: free(ln.buf);
1.198 schwarze 419: if (firstln != curp->secondary)
420: free_buf_list(firstln);
421: return result;
1.1 kristaps 422: }
423:
424: static int
1.203 schwarze 425: read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap)
1.1 kristaps 426: {
1.161 schwarze 427: struct stat st;
1.140 schwarze 428: gzFile gz;
1.1 kristaps 429: size_t off;
430: ssize_t ssz;
1.194 schwarze 431: int gzerrnum, retval;
1.143 schwarze 432:
1.192 schwarze 433: if (fstat(fd, &st) == -1) {
434: mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
435: "fstat: %s", strerror(errno));
436: return 0;
437: }
1.1 kristaps 438:
439: /*
440: * If we're a regular file, try just reading in the whole entry
441: * via mmap(). This is faster than reading it into blocks, and
442: * since each file is only a few bytes to begin with, I'm not
443: * concerned that this is going to tank any machines.
444: */
445:
1.140 schwarze 446: if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
1.131 schwarze 447: if (st.st_size > 0x7fffffff) {
1.111 schwarze 448: mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
1.142 schwarze 449: return 0;
1.1 kristaps 450: }
451: *with_mmap = 1;
452: fb->sz = (size_t)st.st_size;
1.37 schwarze 453: fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
1.1 kristaps 454: if (fb->buf != MAP_FAILED)
1.142 schwarze 455: return 1;
1.1 kristaps 456: }
457:
1.140 schwarze 458: if (curp->gzip) {
1.194 schwarze 459: /*
460: * Duplicating the file descriptor is required
461: * because we will have to call gzclose(3)
462: * to free memory used internally by zlib,
463: * but that will also close the file descriptor,
464: * which this function must not do.
465: */
466: if ((fd = dup(fd)) == -1) {
467: mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
468: "dup: %s", strerror(errno));
469: return 0;
470: }
1.192 schwarze 471: if ((gz = gzdopen(fd, "rb")) == NULL) {
472: mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
473: "gzdopen: %s", strerror(errno));
1.194 schwarze 474: close(fd);
1.192 schwarze 475: return 0;
476: }
1.140 schwarze 477: } else
478: gz = NULL;
479:
1.1 kristaps 480: /*
481: * If this isn't a regular file (like, say, stdin), then we must
482: * go the old way and just read things in bit by bit.
483: */
484:
485: *with_mmap = 0;
486: off = 0;
1.194 schwarze 487: retval = 0;
1.1 kristaps 488: fb->sz = 0;
489: fb->buf = NULL;
490: for (;;) {
491: if (off == fb->sz) {
492: if (fb->sz == (1U << 31)) {
1.111 schwarze 493: mandoc_msg(MANDOCERR_TOOLARGE, curp,
494: 0, 0, NULL);
1.1 kristaps 495: break;
496: }
497: resize_buf(fb, 65536);
498: }
1.140 schwarze 499: ssz = curp->gzip ?
500: gzread(gz, fb->buf + (int)off, fb->sz - off) :
501: read(fd, fb->buf + (int)off, fb->sz - off);
1.1 kristaps 502: if (ssz == 0) {
503: fb->sz = off;
1.194 schwarze 504: retval = 1;
505: break;
1.1 kristaps 506: }
1.192 schwarze 507: if (ssz == -1) {
1.194 schwarze 508: if (curp->gzip)
509: (void)gzerror(gz, &gzerrnum);
510: mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
511: curp->gzip && gzerrnum != Z_ERRNO ?
512: zError(gzerrnum) : strerror(errno));
1.192 schwarze 513: break;
514: }
1.1 kristaps 515: off += (size_t)ssz;
516: }
517:
1.194 schwarze 518: if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
519: mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
520: gzerrnum == Z_ERRNO ? strerror(errno) :
521: zError(gzerrnum));
522: if (retval == 0) {
523: free(fb->buf);
524: fb->buf = NULL;
525: }
526: return retval;
1.1 kristaps 527: }
528:
529: static void
530: mparse_end(struct mparse *curp)
531: {
1.135 schwarze 532: if (curp->man->macroset == MACROSET_NONE)
533: curp->man->macroset = MACROSET_MAN;
534: if (curp->man->macroset == MACROSET_MDOC)
535: mdoc_endparse(curp->man);
536: else
1.111 schwarze 537: man_endparse(curp->man);
1.1 kristaps 538: roff_endparse(curp->roff);
539: }
540:
541: static void
1.36 schwarze 542: mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
1.28 joerg 543: {
1.85 schwarze 544: struct buf *svprimary;
1.28 joerg 545: const char *svfile;
1.95 schwarze 546: size_t offset;
1.36 schwarze 547: static int recursion_depth;
548:
549: if (64 < recursion_depth) {
550: mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
551: return;
552: }
1.28 joerg 553:
554: /* Line number is per-file. */
1.203 schwarze 555: svfile = mandoc_msg_getinfilename();
556: mandoc_msg_setinfilename(file);
1.85 schwarze 557: svprimary = curp->primary;
1.83 schwarze 558: curp->primary = &blk;
1.28 joerg 559: curp->line = 1;
1.36 schwarze 560: recursion_depth++;
1.28 joerg 561:
1.93 schwarze 562: /* Skip an UTF-8 byte order mark. */
563: if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
564: (unsigned char)blk.buf[0] == 0xef &&
565: (unsigned char)blk.buf[1] == 0xbb &&
566: (unsigned char)blk.buf[2] == 0xbf) {
1.95 schwarze 567: offset = 3;
1.93 schwarze 568: curp->filenc &= ~MPARSE_LATIN1;
1.95 schwarze 569: } else
570: offset = 0;
1.93 schwarze 571:
1.95 schwarze 572: mparse_buf_r(curp, blk, offset, 1);
1.28 joerg 573:
1.111 schwarze 574: if (--recursion_depth == 0)
1.28 joerg 575: mparse_end(curp);
576:
1.85 schwarze 577: curp->primary = svprimary;
1.203 schwarze 578: if (svfile != NULL)
579: mandoc_msg_setinfilename(svfile);
1.28 joerg 580: }
581:
1.98 schwarze 582: /*
583: * Read the whole file into memory and call the parsers.
584: * Called recursively when an .so request is encountered.
585: */
1.203 schwarze 586: void
1.36 schwarze 587: mparse_readfd(struct mparse *curp, int fd, const char *file)
1.1 kristaps 588: {
1.28 joerg 589: struct buf blk;
590: int with_mmap;
1.93 schwarze 591: int save_filenc;
1.1 kristaps 592:
1.203 schwarze 593: if (read_whole_file(curp, fd, &blk, &with_mmap)) {
1.93 schwarze 594: save_filenc = curp->filenc;
595: curp->filenc = curp->options &
596: (MPARSE_UTF8 | MPARSE_LATIN1);
1.91 schwarze 597: mparse_parse_buffer(curp, blk, file);
1.93 schwarze 598: curp->filenc = save_filenc;
1.91 schwarze 599: if (with_mmap)
600: munmap(blk.buf, blk.sz);
601: else
602: free(blk.buf);
603: }
1.82 schwarze 604: }
605:
1.148 schwarze 606: int
607: mparse_open(struct mparse *curp, const char *file)
1.82 schwarze 608: {
609: char *cp;
1.148 schwarze 610: int fd;
1.82 schwarze 611:
1.140 schwarze 612: cp = strrchr(file, '.');
613: curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
1.98 schwarze 614:
1.140 schwarze 615: /* First try to use the filename as it is. */
1.98 schwarze 616:
1.148 schwarze 617: if ((fd = open(file, O_RDONLY)) != -1)
618: return fd;
1.98 schwarze 619:
1.140 schwarze 620: /*
621: * If that doesn't work and the filename doesn't
622: * already end in .gz, try appending .gz.
623: */
1.98 schwarze 624:
1.140 schwarze 625: if ( ! curp->gzip) {
1.98 schwarze 626: mandoc_asprintf(&cp, "%s.gz", file);
1.149 schwarze 627: fd = open(cp, O_RDONLY);
1.108 schwarze 628: free(cp);
1.148 schwarze 629: if (fd != -1) {
1.140 schwarze 630: curp->gzip = 1;
1.148 schwarze 631: return fd;
1.82 schwarze 632: }
633: }
634:
1.140 schwarze 635: /* Neither worked, give up. */
1.97 schwarze 636:
1.140 schwarze 637: mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
1.148 schwarze 638: return -1;
1.1 kristaps 639: }
640:
641: struct mparse *
1.203 schwarze 642: mparse_alloc(int options, enum mandoc_os os_e, const char *os_s)
1.1 kristaps 643: {
644: struct mparse *curp;
1.10 kristaps 645:
1.1 kristaps 646: curp = mandoc_calloc(1, sizeof(struct mparse));
647:
1.44 schwarze 648: curp->options = options;
1.179 schwarze 649: curp->os_s = os_s;
1.1 kristaps 650:
1.144 schwarze 651: curp->roff = roff_alloc(curp, options);
1.179 schwarze 652: curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
1.137 schwarze 653: curp->options & MPARSE_QUICK ? 1 : 0);
1.136 schwarze 654: if (curp->options & MPARSE_MDOC) {
1.137 schwarze 655: curp->man->macroset = MACROSET_MDOC;
1.164 schwarze 656: if (curp->man->mdocmac == NULL)
657: curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
1.137 schwarze 658: } else if (curp->options & MPARSE_MAN) {
659: curp->man->macroset = MACROSET_MAN;
1.164 schwarze 660: if (curp->man->manmac == NULL)
661: curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
1.136 schwarze 662: }
1.138 schwarze 663: curp->man->first->tok = TOKEN_NONE;
1.179 schwarze 664: curp->man->meta.os_e = os_e;
1.142 schwarze 665: return curp;
1.1 kristaps 666: }
667:
668: void
669: mparse_reset(struct mparse *curp)
670: {
671: roff_reset(curp->roff);
1.150 schwarze 672: roff_man_reset(curp->man);
1.198 schwarze 673: free_buf_list(curp->secondary);
674: curp->secondary = NULL;
1.160 schwarze 675: free(curp->sodest);
676: curp->sodest = NULL;
1.159 schwarze 677: curp->gzip = 0;
1.1 kristaps 678: }
679:
680: void
681: mparse_free(struct mparse *curp)
682: {
1.164 schwarze 683: roffhash_free(curp->man->mdocmac);
684: roffhash_free(curp->man->manmac);
1.137 schwarze 685: roff_man_free(curp->man);
1.160 schwarze 686: roff_free(curp->roff);
1.198 schwarze 687: free_buf_list(curp->secondary);
1.45 schwarze 688: free(curp->sodest);
1.1 kristaps 689: free(curp);
690: }
691:
692: void
1.135 schwarze 693: mparse_result(struct mparse *curp, struct roff_man **man,
694: char **sodest)
1.1 kristaps 695: {
696:
1.45 schwarze 697: if (sodest && NULL != (*sodest = curp->sodest)) {
698: *man = NULL;
699: return;
700: }
1.9 kristaps 701: if (man)
702: *man = curp->man;
1.24 kristaps 703: }
704:
705: void
1.198 schwarze 706: mparse_copy(const struct mparse *p)
1.24 kristaps 707: {
1.198 schwarze 708: struct buf *buf;
1.24 kristaps 709:
1.198 schwarze 710: for (buf = p->secondary; buf != NULL; buf = buf->next)
711: puts(buf->buf);
1.1 kristaps 712: }
CVSweb