Annotation of mandoc/read.c, Revision 1.103
1.103 ! schwarze 1: /* $Id: read.c,v 1.102 2014/11/30 02:36:38 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.40 schwarze 4: * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
1.42 schwarze 5: * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
1.1 kristaps 6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
1.11 kristaps 19: #include "config.h"
20:
1.80 schwarze 21: #include <sys/types.h>
1.81 schwarze 22: #if HAVE_MMAP
1.82 schwarze 23: #include <sys/mman.h>
1.80 schwarze 24: #include <sys/stat.h>
1.15 kristaps 25: #endif
1.82 schwarze 26: #include <sys/wait.h>
1.1 kristaps 27:
28: #include <assert.h>
29: #include <ctype.h>
1.40 schwarze 30: #include <errno.h>
1.1 kristaps 31: #include <fcntl.h>
1.3 kristaps 32: #include <stdarg.h>
1.28 joerg 33: #include <stdint.h>
1.1 kristaps 34: #include <stdio.h>
35: #include <stdlib.h>
36: #include <string.h>
37: #include <unistd.h>
38:
39: #include "mandoc.h"
1.46 schwarze 40: #include "mandoc_aux.h"
1.3 kristaps 41: #include "libmandoc.h"
1.1 kristaps 42: #include "mdoc.h"
43: #include "man.h"
1.28 joerg 44: #include "main.h"
1.1 kristaps 45:
46: #define REPARSE_LIMIT 1000
47:
48: struct mparse {
49: struct man *pman; /* persistent man parser */
50: struct mdoc *pmdoc; /* persistent mdoc parser */
51: struct man *man; /* man parser */
52: struct mdoc *mdoc; /* mdoc parser */
53: struct roff *roff; /* roff parser (!NULL) */
1.94 schwarze 54: const struct mchars *mchars; /* character table */
1.45 schwarze 55: char *sodest; /* filename pointed to by .so */
1.83 schwarze 56: const char *file; /* filename of current input file */
57: struct buf *primary; /* buffer currently being parsed */
58: struct buf *secondary; /* preprocessed copy of input */
59: const char *defos; /* default operating system */
60: mandocmsg mmsg; /* warning/error message handler */
61: enum mandoclevel file_status; /* status of current parse */
62: enum mandoclevel wlevel; /* ignore messages below this */
63: int options; /* parser options */
1.93 schwarze 64: int filenc; /* encoding of the current file */
1.1 kristaps 65: int reparse_count; /* finite interp. stack */
1.83 schwarze 66: int line; /* line number in the file */
1.97 schwarze 67: pid_t child; /* the gunzip(1) process */
1.1 kristaps 68: };
69:
1.84 schwarze 70: static void choose_parser(struct mparse *);
1.1 kristaps 71: static void resize_buf(struct buf *, size_t);
1.95 schwarze 72: static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
1.40 schwarze 73: static int read_whole_file(struct mparse *, const char *, int,
74: struct buf *, int *);
1.1 kristaps 75: static void mparse_end(struct mparse *);
1.37 schwarze 76: static void mparse_parse_buffer(struct mparse *, struct buf,
77: const char *);
1.1 kristaps 78:
1.3 kristaps 79: static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
80: MANDOCERR_OK,
81: MANDOCERR_WARNING,
82: MANDOCERR_WARNING,
83: MANDOCERR_ERROR,
84: MANDOCERR_FATAL,
85: MANDOCERR_MAX,
86: MANDOCERR_MAX
87: };
88:
1.7 kristaps 89: static const char * const mandocerrs[MANDOCERR_MAX] = {
90: "ok",
91:
92: "generic warning",
93:
94: /* related to the prologue */
1.79 schwarze 95: "missing manual title, using UNTITLED",
96: "missing manual title, using \"\"",
1.54 schwarze 97: "lower case character in document title",
1.79 schwarze 98: "missing manual section, using \"\"",
1.7 kristaps 99: "unknown manual section",
1.54 schwarze 100: "missing date, using today's date",
1.7 kristaps 101: "cannot parse date, using it verbatim",
1.79 schwarze 102: "missing Os macro, using \"\"",
103: "duplicate prologue macro",
104: "late prologue macro",
105: "skipping late title macro",
1.7 kristaps 106: "prologue macros out of order",
107:
108: /* related to document structure */
109: ".so is fragile, better use ln(1)",
1.50 schwarze 110: "no document body",
1.54 schwarze 111: "content before first section header",
112: "first section is not \"NAME\"",
1.7 kristaps 113: "bad NAME section contents",
114: "sections out of conventional order",
1.54 schwarze 115: "duplicate section title",
116: "unexpected section",
1.87 schwarze 117: "unusual Xr order",
118: "unusual Xr punctuation",
1.86 schwarze 119: "AUTHORS section without An macro",
1.7 kristaps 120:
121: /* related to macros and nesting */
1.55 schwarze 122: "obsolete macro",
1.102 schwarze 123: "macro neither callable nor escaped",
1.7 kristaps 124: "skipping paragraph macro",
1.31 schwarze 125: "moving paragraph macro out of list",
1.7 kristaps 126: "skipping no-space macro",
127: "blocks badly nested",
128: "nested displays are not portable",
1.57 schwarze 129: "moving content out of list",
130: ".Vt block has child macro",
1.78 schwarze 131: "fill mode already enabled, skipping",
132: "fill mode already disabled, skipping",
1.7 kristaps 133: "line scope broken",
134:
135: /* related to missing macro arguments */
1.58 schwarze 136: "skipping empty request",
137: "conditional request controls empty scope",
1.7 kristaps 138: "skipping empty macro",
1.62 schwarze 139: "empty argument, using 0n",
1.7 kristaps 140: "argument count wrong",
1.60 schwarze 141: "missing display type, using -ragged",
142: "list type is not the first argument",
143: "missing -width in -tag list, using 8n",
1.78 schwarze 144: "missing utility name, using \"\"",
1.60 schwarze 145: "empty head in list item",
146: "empty list item",
1.61 schwarze 147: "missing font type, using \\fR",
148: "unknown font type, using \\fR",
1.103 ! schwarze 149: "nothing follows prefix",
1.60 schwarze 150: "missing -std argument, adding it",
1.90 schwarze 151: "missing eqn box, using \"\"",
1.7 kristaps 152:
153: /* related to bad macro arguments */
1.64 schwarze 154: "unterminated quoted argument",
1.7 kristaps 155: "duplicate argument",
1.76 schwarze 156: "skipping duplicate argument",
1.63 schwarze 157: "skipping duplicate display type",
158: "skipping duplicate list type",
1.76 schwarze 159: "skipping -width argument",
1.7 kristaps 160: "unknown AT&T UNIX version",
1.88 schwarze 161: "comma in function argument",
1.89 schwarze 162: "parenthesis in function name",
1.67 schwarze 163: "invalid content in Rs block",
1.63 schwarze 164: "invalid Boolean argument",
165: "unknown font, skipping request",
1.7 kristaps 166:
167: /* related to plain text */
1.64 schwarze 168: "blank line in fill mode, using .sp",
169: "tab in filled text",
170: "whitespace at end of input line",
1.7 kristaps 171: "bad comment style",
1.64 schwarze 172: "invalid escape sequence",
173: "undefined string, using \"\"",
1.16 kristaps 174:
1.7 kristaps 175: "generic error",
1.17 kristaps 176:
177: /* related to equations */
1.20 kristaps 178: "unexpected equation scope closure",
179: "equation scope open on exit",
1.21 kristaps 180: "overlapping equation scopes",
181: "unexpected end of equation",
1.7 kristaps 182:
183: /* related to tables */
184: "bad table syntax",
185: "bad table option",
186: "bad table layout",
187: "no table layout cells specified",
188: "no table data cells specified",
189: "ignore data in cell",
190: "data block still open",
191: "ignoring extra data cells",
192:
1.68 schwarze 193: /* related to document structure and macros */
1.7 kristaps 194: "input stack limit exceeded, infinite loop?",
195: "skipping bad character",
1.68 schwarze 196: "skipping unknown macro",
1.73 schwarze 197: "skipping item outside list",
1.68 schwarze 198: "skipping column outside column list",
199: "skipping end of block that is not open",
200: "inserting missing end of block",
201: "appending missing end of block",
202:
203: /* related to request and macro arguments */
1.7 kristaps 204: "escaped character not allowed in a name",
205: "argument count wrong",
1.99 schwarze 206: "NOT IMPLEMENTED: Bd -file",
1.71 schwarze 207: "missing list type, using -item",
1.70 schwarze 208: "missing manual name, using \"\"",
1.71 schwarze 209: "uname(3) system call failed, using UNKNOWN",
1.63 schwarze 210: "unknown standard specifier",
1.71 schwarze 211: "skipping request without numeric argument",
1.61 schwarze 212: "skipping all arguments",
213: "skipping excess arguments",
1.92 kristaps 214: "divide by zero",
1.7 kristaps 215:
216: "generic fatal error",
217:
1.40 schwarze 218: "input too large",
1.7 kristaps 219: "NOT IMPLEMENTED: .so with absolute path or \"..\"",
1.52 schwarze 220: ".so request failed",
1.40 schwarze 221:
222: /* system errors */
1.82 schwarze 223: "cannot dup file descriptor",
224: "cannot exec",
225: "gunzip failed with code",
226: "cannot fork",
1.51 schwarze 227: NULL,
1.82 schwarze 228: "cannot open pipe",
229: "cannot read file",
230: "gunzip died from signal",
1.40 schwarze 231: "cannot stat file",
1.82 schwarze 232: "wait failed",
1.7 kristaps 233: };
234:
235: static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
236: "SUCCESS",
237: "RESERVED",
238: "WARNING",
239: "ERROR",
240: "FATAL",
241: "BADARG",
242: "SYSERR"
243: };
244:
1.47 schwarze 245:
1.1 kristaps 246: static void
247: resize_buf(struct buf *buf, size_t initial)
248: {
249:
250: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
251: buf->buf = mandoc_realloc(buf->buf, buf->sz);
252: }
253:
254: static void
1.84 schwarze 255: choose_parser(struct mparse *curp)
1.1 kristaps 256: {
1.83 schwarze 257: char *cp, *ep;
258: int format;
1.1 kristaps 259:
1.83 schwarze 260: /*
261: * If neither command line arguments -mdoc or -man select
262: * a parser nor the roff parser found a .Dd or .TH macro
263: * yet, look ahead in the main input buffer.
264: */
265:
266: if ((format = roff_getformat(curp->roff)) == 0) {
267: cp = curp->primary->buf;
268: ep = cp + curp->primary->sz;
269: while (cp < ep) {
1.85 schwarze 270: if (*cp == '.' || *cp == '\'') {
1.83 schwarze 271: cp++;
272: if (cp[0] == 'D' && cp[1] == 'd') {
273: format = MPARSE_MDOC;
274: break;
275: }
276: if (cp[0] == 'T' && cp[1] == 'H') {
277: format = MPARSE_MAN;
278: break;
279: }
280: }
281: cp = memchr(cp, '\n', ep - cp);
282: if (cp == NULL)
283: break;
284: cp++;
285: }
1.1 kristaps 286: }
287:
1.83 schwarze 288: if (format == MPARSE_MDOC) {
1.47 schwarze 289: if (NULL == curp->pmdoc)
1.44 schwarze 290: curp->pmdoc = mdoc_alloc(
291: curp->roff, curp, curp->defos,
292: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 kristaps 293: assert(curp->pmdoc);
294: curp->mdoc = curp->pmdoc;
295: return;
1.47 schwarze 296: }
1.1 kristaps 297:
1.83 schwarze 298: /* Fall back to man(7) as a last resort. */
299:
1.47 schwarze 300: if (NULL == curp->pman)
1.44 schwarze 301: curp->pman = man_alloc(curp->roff, curp,
302: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 kristaps 303: assert(curp->pman);
304: curp->man = curp->pman;
305: }
306:
307: /*
1.95 schwarze 308: * Main parse routine for a buffer.
309: * It assumes encoding and line numbering are already set up.
310: * It can recurse directly (for invocations of user-defined
311: * macros, inline equations, and input line traps)
312: * and indirectly (for .so file inclusion).
1.1 kristaps 313: */
314: static void
1.95 schwarze 315: mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
1.1 kristaps 316: {
317: const struct tbl_span *span;
318: struct buf ln;
1.95 schwarze 319: size_t pos; /* byte number in the ln buffer */
1.1 kristaps 320: enum rofferr rr;
1.100 schwarze 321: int of;
1.1 kristaps 322: int lnn; /* line number in the real file */
323: unsigned char c;
324:
1.95 schwarze 325: memset(&ln, 0, sizeof(ln));
1.1 kristaps 326:
1.47 schwarze 327: lnn = curp->line;
328: pos = 0;
1.1 kristaps 329:
1.95 schwarze 330: while (i < blk.sz) {
1.1 kristaps 331: if (0 == pos && '\0' == blk.buf[i])
332: break;
333:
334: if (start) {
335: curp->line = lnn;
336: curp->reparse_count = 0;
1.93 schwarze 337:
338: if (lnn < 3 &&
339: curp->filenc & MPARSE_UTF8 &&
1.95 schwarze 340: curp->filenc & MPARSE_LATIN1)
341: curp->filenc = preconv_cue(&blk, i);
1.1 kristaps 342: }
343:
1.95 schwarze 344: while (i < blk.sz && (start || blk.buf[i] != '\0')) {
1.1 kristaps 345:
346: /*
347: * When finding an unescaped newline character,
348: * leave the character loop to process the line.
349: * Skip a preceding carriage return, if any.
350: */
351:
1.95 schwarze 352: if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
1.1 kristaps 353: '\n' == blk.buf[i + 1])
354: ++i;
355: if ('\n' == blk.buf[i]) {
356: ++i;
357: ++lnn;
358: break;
359: }
360:
1.35 schwarze 361: /*
1.93 schwarze 362: * Make sure we have space for the worst
363: * case of 11 bytes: "\\[u10ffff]\0"
1.35 schwarze 364: */
365:
1.95 schwarze 366: if (pos + 11 > ln.sz)
1.35 schwarze 367: resize_buf(&ln, 256);
368:
1.47 schwarze 369: /*
1.93 schwarze 370: * Encode 8-bit input.
1.1 kristaps 371: */
372:
1.93 schwarze 373: c = blk.buf[i];
374: if (c & 0x80) {
1.95 schwarze 375: if ( ! (curp->filenc && preconv_encode(
376: &blk, &i, &ln, &pos, &curp->filenc))) {
1.93 schwarze 377: mandoc_vmsg(MANDOCERR_BADCHAR,
378: curp, curp->line, pos,
379: "0x%x", c);
380: ln.buf[pos++] = '?';
381: i++;
382: }
383: continue;
384: }
385:
386: /*
387: * Exclude control characters.
388: */
1.1 kristaps 389:
1.93 schwarze 390: if (c == 0x7f || (c < 0x20 && c != 0x09)) {
1.78 schwarze 391: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
392: curp->line, pos, "0x%x", c);
1.1 kristaps 393: i++;
1.27 joerg 394: ln.buf[pos++] = '?';
1.1 kristaps 395: continue;
396: }
397:
398: /* Trailing backslash = a plain char. */
399:
1.95 schwarze 400: if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
1.1 kristaps 401: ln.buf[pos++] = blk.buf[i++];
402: continue;
403: }
404:
405: /*
406: * Found escape and at least one other character.
407: * When it's a newline character, skip it.
408: * When there is a carriage return in between,
409: * skip that one as well.
410: */
411:
1.95 schwarze 412: if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
1.1 kristaps 413: '\n' == blk.buf[i + 2])
414: ++i;
415: if ('\n' == blk.buf[i + 1]) {
416: i += 2;
417: ++lnn;
418: continue;
419: }
420:
1.13 kristaps 421: if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
1.1 kristaps 422: i += 2;
423: /* Comment, skip to end of line */
1.95 schwarze 424: for (; i < blk.sz; ++i) {
1.1 kristaps 425: if ('\n' == blk.buf[i]) {
426: ++i;
427: ++lnn;
428: break;
429: }
430: }
431:
432: /* Backout trailing whitespaces */
433: for (; pos > 0; --pos) {
434: if (ln.buf[pos - 1] != ' ')
435: break;
436: if (pos > 2 && ln.buf[pos - 2] == '\\')
437: break;
438: }
439: break;
440: }
441:
1.35 schwarze 442: /* Catch escaped bogus characters. */
443:
444: c = (unsigned char) blk.buf[i+1];
445:
1.47 schwarze 446: if ( ! (isascii(c) &&
447: (isgraph(c) || isblank(c)))) {
1.78 schwarze 448: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
449: curp->line, pos, "0x%x", c);
1.35 schwarze 450: i += 2;
451: ln.buf[pos++] = '?';
452: continue;
453: }
454:
1.1 kristaps 455: /* Some other escape sequence, copy & cont. */
456:
457: ln.buf[pos++] = blk.buf[i++];
458: ln.buf[pos++] = blk.buf[i++];
459: }
460:
1.95 schwarze 461: if (pos >= ln.sz)
1.1 kristaps 462: resize_buf(&ln, 256);
463:
464: ln.buf[pos] = '\0';
465:
466: /*
467: * A significant amount of complexity is contained by
468: * the roff preprocessor. It's line-oriented but can be
469: * expressed on one line, so we need at times to
470: * readjust our starting point and re-run it. The roff
471: * preprocessor can also readjust the buffers with new
472: * data, so we pass them in wholesale.
473: */
474:
475: of = 0;
476:
1.24 kristaps 477: /*
478: * Maintain a lookaside buffer of all parsed lines. We
479: * only do this if mparse_keep() has been invoked (the
480: * buffer may be accessed with mparse_getkeep()).
481: */
482:
483: if (curp->secondary) {
1.47 schwarze 484: curp->secondary->buf = mandoc_realloc(
485: curp->secondary->buf,
486: curp->secondary->sz + pos + 2);
487: memcpy(curp->secondary->buf +
488: curp->secondary->sz,
489: ln.buf, pos);
1.24 kristaps 490: curp->secondary->sz += pos;
491: curp->secondary->buf
492: [curp->secondary->sz] = '\n';
493: curp->secondary->sz++;
494: curp->secondary->buf
495: [curp->secondary->sz] = '\0';
496: }
1.1 kristaps 497: rerun:
1.96 schwarze 498: rr = roff_parseln(curp->roff, curp->line, &ln, &of);
1.1 kristaps 499:
500: switch (rr) {
1.47 schwarze 501: case ROFF_REPARSE:
1.1 kristaps 502: if (REPARSE_LIMIT >= ++curp->reparse_count)
1.95 schwarze 503: mparse_buf_r(curp, ln, of, 0);
1.1 kristaps 504: else
1.3 kristaps 505: mandoc_msg(MANDOCERR_ROFFLOOP, curp,
1.47 schwarze 506: curp->line, pos, NULL);
1.1 kristaps 507: pos = 0;
508: continue;
1.47 schwarze 509: case ROFF_APPEND:
1.95 schwarze 510: pos = strlen(ln.buf);
1.1 kristaps 511: continue;
1.47 schwarze 512: case ROFF_RERUN:
1.1 kristaps 513: goto rerun;
1.47 schwarze 514: case ROFF_IGN:
1.1 kristaps 515: pos = 0;
516: continue;
1.47 schwarze 517: case ROFF_ERR:
1.1 kristaps 518: assert(MANDOCLEVEL_FATAL <= curp->file_status);
519: break;
1.47 schwarze 520: case ROFF_SO:
1.95 schwarze 521: if ( ! (curp->options & MPARSE_SO) &&
522: (i >= blk.sz || blk.buf[i] == '\0')) {
1.45 schwarze 523: curp->sodest = mandoc_strdup(ln.buf + of);
524: free(ln.buf);
525: return;
526: }
1.24 kristaps 527: /*
528: * We remove `so' clauses from our lookaside
529: * buffer because we're going to descend into
530: * the file recursively.
531: */
1.47 schwarze 532: if (curp->secondary)
1.25 kristaps 533: curp->secondary->sz -= pos + 1;
1.36 schwarze 534: mparse_readfd(curp, -1, ln.buf + of);
1.52 schwarze 535: if (MANDOCLEVEL_FATAL <= curp->file_status) {
536: mandoc_vmsg(MANDOCERR_SO_FAIL,
537: curp, curp->line, pos,
538: ".so %s", ln.buf + of);
1.1 kristaps 539: break;
1.52 schwarze 540: }
1.1 kristaps 541: pos = 0;
542: continue;
543: default:
544: break;
545: }
546:
547: /*
548: * If we encounter errors in the recursive parse, make
549: * sure we don't continue parsing.
550: */
551:
552: if (MANDOCLEVEL_FATAL <= curp->file_status)
553: break;
554:
555: /*
556: * If input parsers have not been allocated, do so now.
1.14 kristaps 557: * We keep these instanced between parsers, but set them
1.1 kristaps 558: * locally per parse routine since we can use different
559: * parsers with each one.
560: */
561:
562: if ( ! (curp->man || curp->mdoc))
1.84 schwarze 563: choose_parser(curp);
1.1 kristaps 564:
1.47 schwarze 565: /*
1.84 schwarze 566: * Lastly, push down into the parsers themselves.
1.1 kristaps 567: * If libroff returns ROFF_TBL, then add it to the
568: * currently open parse. Since we only get here if
569: * there does exist data (see tbl_data.c), we're
570: * guaranteed that something's been allocated.
571: * Do the same for ROFF_EQN.
572: */
573:
1.100 schwarze 574: if (rr == ROFF_TBL) {
575: while ((span = roff_span(curp->roff)) != NULL)
576: if (curp->man == NULL)
577: mdoc_addspan(curp->mdoc, span);
578: else
579: man_addspan(curp->man, span);
580: } else if (rr == ROFF_EQN) {
581: if (curp->man == NULL)
582: mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
583: else
584: man_addeqn(curp->man, roff_eqn(curp->roff));
585: } else if ((curp->man == NULL ?
586: mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
587: man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
588: break;
1.1 kristaps 589:
590: /* Temporary buffers typically are not full. */
591:
592: if (0 == start && '\0' == blk.buf[i])
593: break;
594:
595: /* Start the next input line. */
596:
597: pos = 0;
598: }
599:
600: free(ln.buf);
601: }
602:
603: static int
1.40 schwarze 604: read_whole_file(struct mparse *curp, const char *file, int fd,
605: struct buf *fb, int *with_mmap)
1.1 kristaps 606: {
607: size_t off;
608: ssize_t ssz;
609:
1.81 schwarze 610: #if HAVE_MMAP
1.15 kristaps 611: struct stat st;
1.1 kristaps 612: if (-1 == fstat(fd, &st)) {
1.40 schwarze 613: curp->file_status = MANDOCLEVEL_SYSERR;
614: if (curp->mmsg)
615: (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
616: file, 0, 0, strerror(errno));
1.1 kristaps 617: return(0);
618: }
619:
620: /*
621: * If we're a regular file, try just reading in the whole entry
622: * via mmap(). This is faster than reading it into blocks, and
623: * since each file is only a few bytes to begin with, I'm not
624: * concerned that this is going to tank any machines.
625: */
626:
627: if (S_ISREG(st.st_mode)) {
628: if (st.st_size >= (1U << 31)) {
1.40 schwarze 629: curp->file_status = MANDOCLEVEL_FATAL;
630: if (curp->mmsg)
631: (*curp->mmsg)(MANDOCERR_TOOLARGE,
632: curp->file_status, file, 0, 0, NULL);
1.1 kristaps 633: return(0);
634: }
635: *with_mmap = 1;
636: fb->sz = (size_t)st.st_size;
1.37 schwarze 637: fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
1.1 kristaps 638: if (fb->buf != MAP_FAILED)
639: return(1);
640: }
1.15 kristaps 641: #endif
1.1 kristaps 642:
643: /*
644: * If this isn't a regular file (like, say, stdin), then we must
645: * go the old way and just read things in bit by bit.
646: */
647:
648: *with_mmap = 0;
649: off = 0;
650: fb->sz = 0;
651: fb->buf = NULL;
652: for (;;) {
653: if (off == fb->sz) {
654: if (fb->sz == (1U << 31)) {
1.40 schwarze 655: curp->file_status = MANDOCLEVEL_FATAL;
656: if (curp->mmsg)
657: (*curp->mmsg)(MANDOCERR_TOOLARGE,
658: curp->file_status,
659: file, 0, 0, NULL);
1.1 kristaps 660: break;
661: }
662: resize_buf(fb, 65536);
663: }
664: ssz = read(fd, fb->buf + (int)off, fb->sz - off);
665: if (ssz == 0) {
666: fb->sz = off;
667: return(1);
668: }
669: if (ssz == -1) {
1.40 schwarze 670: curp->file_status = MANDOCLEVEL_SYSERR;
671: if (curp->mmsg)
672: (*curp->mmsg)(MANDOCERR_SYSREAD,
673: curp->file_status, file, 0, 0,
674: strerror(errno));
1.1 kristaps 675: break;
676: }
677: off += (size_t)ssz;
678: }
679:
680: free(fb->buf);
681: fb->buf = NULL;
682: return(0);
683: }
684:
685: static void
686: mparse_end(struct mparse *curp)
687: {
688:
689: if (MANDOCLEVEL_FATAL <= curp->file_status)
690: return;
691:
1.72 schwarze 692: if (curp->mdoc == NULL &&
693: curp->man == NULL &&
694: curp->sodest == NULL) {
695: if (curp->options & MPARSE_MDOC)
696: curp->mdoc = curp->pmdoc;
697: else {
698: if (curp->pman == NULL)
699: curp->pman = man_alloc(curp->roff, curp,
700: curp->options & MPARSE_QUICK ? 1 : 0);
701: curp->man = curp->pman;
702: }
703: }
704:
1.1 kristaps 705: if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
706: assert(MANDOCLEVEL_FATAL <= curp->file_status);
707: return;
708: }
709:
710: if (curp->man && ! man_endparse(curp->man)) {
711: assert(MANDOCLEVEL_FATAL <= curp->file_status);
712: return;
713: }
714:
715: roff_endparse(curp->roff);
716: }
717:
718: static void
1.36 schwarze 719: mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
1.28 joerg 720: {
1.85 schwarze 721: struct buf *svprimary;
1.28 joerg 722: const char *svfile;
1.95 schwarze 723: size_t offset;
1.36 schwarze 724: static int recursion_depth;
725:
726: if (64 < recursion_depth) {
727: mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
728: return;
729: }
1.28 joerg 730:
731: /* Line number is per-file. */
732: svfile = curp->file;
733: curp->file = file;
1.85 schwarze 734: svprimary = curp->primary;
1.83 schwarze 735: curp->primary = &blk;
1.28 joerg 736: curp->line = 1;
1.36 schwarze 737: recursion_depth++;
1.28 joerg 738:
1.93 schwarze 739: /* Skip an UTF-8 byte order mark. */
740: if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
741: (unsigned char)blk.buf[0] == 0xef &&
742: (unsigned char)blk.buf[1] == 0xbb &&
743: (unsigned char)blk.buf[2] == 0xbf) {
1.95 schwarze 744: offset = 3;
1.93 schwarze 745: curp->filenc &= ~MPARSE_LATIN1;
1.95 schwarze 746: } else
747: offset = 0;
1.93 schwarze 748:
1.95 schwarze 749: mparse_buf_r(curp, blk, offset, 1);
1.28 joerg 750:
1.36 schwarze 751: if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
1.28 joerg 752: mparse_end(curp);
753:
1.85 schwarze 754: curp->primary = svprimary;
1.28 joerg 755: curp->file = svfile;
756: }
757:
758: enum mandoclevel
759: mparse_readmem(struct mparse *curp, const void *buf, size_t len,
760: const char *file)
761: {
762: struct buf blk;
763:
764: blk.buf = UNCONST(buf);
765: blk.sz = len;
766:
1.36 schwarze 767: mparse_parse_buffer(curp, blk, file);
1.28 joerg 768: return(curp->file_status);
769: }
770:
1.98 schwarze 771: /*
772: * If a file descriptor is given, use it and assume it points
773: * to the named file. Otherwise, open the named file.
774: * Read the whole file into memory and call the parsers.
775: * Called recursively when an .so request is encountered.
776: */
1.36 schwarze 777: enum mandoclevel
778: mparse_readfd(struct mparse *curp, int fd, const char *file)
1.1 kristaps 779: {
1.28 joerg 780: struct buf blk;
781: int with_mmap;
1.93 schwarze 782: int save_filenc;
1.98 schwarze 783: pid_t save_child;
1.1 kristaps 784:
1.98 schwarze 785: save_child = curp->child;
786: if (fd != -1)
787: curp->child = 0;
788: else if (mparse_open(curp, &fd, file) >= MANDOCLEVEL_SYSERR)
789: goto out;
1.1 kristaps 790:
1.91 schwarze 791: if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
1.93 schwarze 792: save_filenc = curp->filenc;
793: curp->filenc = curp->options &
794: (MPARSE_UTF8 | MPARSE_LATIN1);
1.91 schwarze 795: mparse_parse_buffer(curp, blk, file);
1.93 schwarze 796: curp->filenc = save_filenc;
1.81 schwarze 797: #if HAVE_MMAP
1.91 schwarze 798: if (with_mmap)
799: munmap(blk.buf, blk.sz);
800: else
1.28 joerg 801: #endif
1.91 schwarze 802: free(blk.buf);
803: }
1.1 kristaps 804:
1.98 schwarze 805: if (fd != STDIN_FILENO && close(fd) == -1)
1.1 kristaps 806: perror(file);
1.91 schwarze 807:
1.98 schwarze 808: mparse_wait(curp);
809: out:
810: curp->child = save_child;
1.1 kristaps 811: return(curp->file_status);
1.82 schwarze 812: }
813:
814: enum mandoclevel
1.97 schwarze 815: mparse_open(struct mparse *curp, int *fd, const char *file)
1.82 schwarze 816: {
817: int pfd[2];
1.98 schwarze 818: int save_errno;
1.82 schwarze 819: char *cp;
820: enum mandocerr err;
821:
822: pfd[1] = -1;
823: curp->file = file;
1.98 schwarze 824:
825: /* Unless zipped, try to just open the file. */
826:
1.82 schwarze 827: if ((cp = strrchr(file, '.')) == NULL ||
828: strcmp(cp + 1, "gz")) {
1.97 schwarze 829: curp->child = 0;
1.98 schwarze 830: if ((*fd = open(file, O_RDONLY)) != -1)
831: return(MANDOCLEVEL_OK);
832:
833: /* Open failed; try to append ".gz". */
834:
835: mandoc_asprintf(&cp, "%s.gz", file);
836: file = cp;
837: } else
838: cp = NULL;
839:
840: /* Before forking, make sure the file can be read. */
841:
842: save_errno = errno;
843: if (access(file, R_OK) == -1) {
844: if (cp != NULL)
845: errno = save_errno;
846: err = MANDOCERR_SYSOPEN;
847: goto out;
1.82 schwarze 848: }
849:
1.98 schwarze 850: /* Run gunzip(1). */
851:
1.82 schwarze 852: if (pipe(pfd) == -1) {
853: err = MANDOCERR_SYSPIPE;
854: goto out;
855: }
856:
1.97 schwarze 857: switch (curp->child = fork()) {
1.82 schwarze 858: case -1:
859: err = MANDOCERR_SYSFORK;
860: close(pfd[0]);
861: close(pfd[1]);
862: pfd[1] = -1;
863: break;
864: case 0:
865: close(pfd[0]);
866: if (dup2(pfd[1], STDOUT_FILENO) == -1) {
867: err = MANDOCERR_SYSDUP;
868: break;
869: }
870: execlp("gunzip", "gunzip", "-c", file, NULL);
871: err = MANDOCERR_SYSEXEC;
872: break;
873: default:
874: close(pfd[1]);
875: *fd = pfd[0];
876: return(MANDOCLEVEL_OK);
877: }
878:
879: out:
1.98 schwarze 880: free(cp);
1.82 schwarze 881: *fd = -1;
1.97 schwarze 882: curp->child = 0;
1.82 schwarze 883: curp->file_status = MANDOCLEVEL_SYSERR;
884: if (curp->mmsg)
1.98 schwarze 885: (*curp->mmsg)(err, curp->file_status, curp->file,
1.82 schwarze 886: 0, 0, strerror(errno));
887: if (pfd[1] != -1)
888: exit(1);
889: return(curp->file_status);
890: }
891:
892: enum mandoclevel
1.97 schwarze 893: mparse_wait(struct mparse *curp)
1.82 schwarze 894: {
895: int status;
896:
1.97 schwarze 897: if (curp->child == 0)
898: return(MANDOCLEVEL_OK);
899:
900: if (waitpid(curp->child, &status, 0) == -1) {
1.82 schwarze 901: mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
902: strerror(errno));
903: curp->file_status = MANDOCLEVEL_SYSERR;
904: return(curp->file_status);
905: }
906: if (WIFSIGNALED(status)) {
907: mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
908: "%d", WTERMSIG(status));
909: curp->file_status = MANDOCLEVEL_SYSERR;
910: return(curp->file_status);
911: }
912: if (WEXITSTATUS(status)) {
913: mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
914: "%d", WEXITSTATUS(status));
915: curp->file_status = MANDOCLEVEL_SYSERR;
916: return(curp->file_status);
917: }
918: return(MANDOCLEVEL_OK);
1.1 kristaps 919: }
920:
921: struct mparse *
1.94 schwarze 922: mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
923: const struct mchars *mchars, const char *defos)
1.1 kristaps 924: {
925: struct mparse *curp;
1.10 kristaps 926:
927: assert(wlevel <= MANDOCLEVEL_FATAL);
1.1 kristaps 928:
929: curp = mandoc_calloc(1, sizeof(struct mparse));
930:
1.44 schwarze 931: curp->options = options;
1.3 kristaps 932: curp->wlevel = wlevel;
1.1 kristaps 933: curp->mmsg = mmsg;
1.29 schwarze 934: curp->defos = defos;
1.1 kristaps 935:
1.94 schwarze 936: curp->mchars = mchars;
937: curp->roff = roff_alloc(curp, curp->mchars, options);
1.72 schwarze 938: if (curp->options & MPARSE_MDOC)
939: curp->pmdoc = mdoc_alloc(
940: curp->roff, curp, curp->defos,
941: curp->options & MPARSE_QUICK ? 1 : 0);
942: if (curp->options & MPARSE_MAN)
943: curp->pman = man_alloc(curp->roff, curp,
944: curp->options & MPARSE_QUICK ? 1 : 0);
945:
1.1 kristaps 946: return(curp);
947: }
948:
949: void
950: mparse_reset(struct mparse *curp)
951: {
952:
953: roff_reset(curp->roff);
954:
955: if (curp->mdoc)
956: mdoc_reset(curp->mdoc);
957: if (curp->man)
958: man_reset(curp->man);
1.24 kristaps 959: if (curp->secondary)
960: curp->secondary->sz = 0;
1.1 kristaps 961:
962: curp->file_status = MANDOCLEVEL_OK;
963: curp->mdoc = NULL;
964: curp->man = NULL;
1.45 schwarze 965:
966: free(curp->sodest);
967: curp->sodest = NULL;
1.1 kristaps 968: }
969:
970: void
971: mparse_free(struct mparse *curp)
972: {
973:
974: if (curp->pmdoc)
975: mdoc_free(curp->pmdoc);
976: if (curp->pman)
977: man_free(curp->pman);
978: if (curp->roff)
979: roff_free(curp->roff);
1.24 kristaps 980: if (curp->secondary)
981: free(curp->secondary->buf);
1.1 kristaps 982:
1.24 kristaps 983: free(curp->secondary);
1.45 schwarze 984: free(curp->sodest);
1.1 kristaps 985: free(curp);
986: }
987:
988: void
1.45 schwarze 989: mparse_result(struct mparse *curp,
990: struct mdoc **mdoc, struct man **man, char **sodest)
1.1 kristaps 991: {
992:
1.45 schwarze 993: if (sodest && NULL != (*sodest = curp->sodest)) {
994: *mdoc = NULL;
995: *man = NULL;
996: return;
997: }
1.9 kristaps 998: if (mdoc)
999: *mdoc = curp->mdoc;
1000: if (man)
1001: *man = curp->man;
1.3 kristaps 1002: }
1003:
1004: void
1005: mandoc_vmsg(enum mandocerr t, struct mparse *m,
1006: int ln, int pos, const char *fmt, ...)
1007: {
1008: char buf[256];
1009: va_list ap;
1010:
1011: va_start(ap, fmt);
1.48 schwarze 1012: (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1.3 kristaps 1013: va_end(ap);
1014:
1015: mandoc_msg(t, m, ln, pos, buf);
1016: }
1017:
1018: void
1.47 schwarze 1019: mandoc_msg(enum mandocerr er, struct mparse *m,
1.3 kristaps 1020: int ln, int col, const char *msg)
1021: {
1022: enum mandoclevel level;
1023:
1024: level = MANDOCLEVEL_FATAL;
1025: while (er < mandoclimits[level])
1026: level--;
1027:
1028: if (level < m->wlevel)
1029: return;
1030:
1.8 kristaps 1031: if (m->mmsg)
1032: (*m->mmsg)(er, level, m->file, ln, col, msg);
1.3 kristaps 1033:
1034: if (m->file_status < level)
1035: m->file_status = level;
1.7 kristaps 1036: }
1037:
1038: const char *
1039: mparse_strerror(enum mandocerr er)
1040: {
1041:
1042: return(mandocerrs[er]);
1043: }
1044:
1045: const char *
1046: mparse_strlevel(enum mandoclevel lvl)
1047: {
1048: return(mandoclevels[lvl]);
1.24 kristaps 1049: }
1050:
1051: void
1052: mparse_keep(struct mparse *p)
1053: {
1054:
1055: assert(NULL == p->secondary);
1056: p->secondary = mandoc_calloc(1, sizeof(struct buf));
1057: }
1058:
1059: const char *
1060: mparse_getkeep(const struct mparse *p)
1061: {
1062:
1063: assert(p->secondary);
1064: return(p->secondary->sz ? p->secondary->buf : NULL);
1.1 kristaps 1065: }
CVSweb