Annotation of mandoc/read.c, Revision 1.93
1.93 ! schwarze 1: /* $Id: read.c,v 1.92 2014/10/20 19:04:45 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.40 schwarze 4: * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
1.42 schwarze 5: * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
1.1 kristaps 6: *
7: * Permission to use, copy, modify, and distribute this software for any
8: * purpose with or without fee is hereby granted, provided that the above
9: * copyright notice and this permission notice appear in all copies.
10: *
11: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18: */
1.11 kristaps 19: #include "config.h"
20:
1.80 schwarze 21: #include <sys/types.h>
1.81 schwarze 22: #if HAVE_MMAP
1.82 schwarze 23: #include <sys/mman.h>
1.80 schwarze 24: #include <sys/stat.h>
1.15 kristaps 25: #endif
1.82 schwarze 26: #include <sys/wait.h>
1.1 kristaps 27:
28: #include <assert.h>
29: #include <ctype.h>
1.40 schwarze 30: #include <errno.h>
1.1 kristaps 31: #include <fcntl.h>
1.3 kristaps 32: #include <stdarg.h>
1.28 joerg 33: #include <stdint.h>
1.1 kristaps 34: #include <stdio.h>
35: #include <stdlib.h>
36: #include <string.h>
37: #include <unistd.h>
38:
39: #include "mandoc.h"
1.46 schwarze 40: #include "mandoc_aux.h"
1.3 kristaps 41: #include "libmandoc.h"
1.1 kristaps 42: #include "mdoc.h"
43: #include "man.h"
1.28 joerg 44: #include "main.h"
1.1 kristaps 45:
46: #define REPARSE_LIMIT 1000
47:
48: struct mparse {
49: struct man *pman; /* persistent man parser */
50: struct mdoc *pmdoc; /* persistent mdoc parser */
51: struct man *man; /* man parser */
52: struct mdoc *mdoc; /* mdoc parser */
53: struct roff *roff; /* roff parser (!NULL) */
1.45 schwarze 54: char *sodest; /* filename pointed to by .so */
1.83 schwarze 55: const char *file; /* filename of current input file */
56: struct buf *primary; /* buffer currently being parsed */
57: struct buf *secondary; /* preprocessed copy of input */
58: const char *defos; /* default operating system */
59: mandocmsg mmsg; /* warning/error message handler */
60: enum mandoclevel file_status; /* status of current parse */
61: enum mandoclevel wlevel; /* ignore messages below this */
62: int options; /* parser options */
1.93 ! schwarze 63: int filenc; /* encoding of the current file */
1.1 kristaps 64: int reparse_count; /* finite interp. stack */
1.83 schwarze 65: int line; /* line number in the file */
1.1 kristaps 66: };
67:
1.84 schwarze 68: static void choose_parser(struct mparse *);
1.1 kristaps 69: static void resize_buf(struct buf *, size_t);
70: static void mparse_buf_r(struct mparse *, struct buf, int);
1.40 schwarze 71: static int read_whole_file(struct mparse *, const char *, int,
72: struct buf *, int *);
1.1 kristaps 73: static void mparse_end(struct mparse *);
1.37 schwarze 74: static void mparse_parse_buffer(struct mparse *, struct buf,
75: const char *);
1.1 kristaps 76:
1.3 kristaps 77: static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
78: MANDOCERR_OK,
79: MANDOCERR_WARNING,
80: MANDOCERR_WARNING,
81: MANDOCERR_ERROR,
82: MANDOCERR_FATAL,
83: MANDOCERR_MAX,
84: MANDOCERR_MAX
85: };
86:
1.7 kristaps 87: static const char * const mandocerrs[MANDOCERR_MAX] = {
88: "ok",
89:
90: "generic warning",
91:
92: /* related to the prologue */
1.79 schwarze 93: "missing manual title, using UNTITLED",
94: "missing manual title, using \"\"",
1.54 schwarze 95: "lower case character in document title",
1.79 schwarze 96: "missing manual section, using \"\"",
1.7 kristaps 97: "unknown manual section",
1.32 schwarze 98: "unknown manual volume or arch",
1.54 schwarze 99: "missing date, using today's date",
1.7 kristaps 100: "cannot parse date, using it verbatim",
1.79 schwarze 101: "missing Os macro, using \"\"",
102: "duplicate prologue macro",
103: "late prologue macro",
104: "skipping late title macro",
1.7 kristaps 105: "prologue macros out of order",
106:
107: /* related to document structure */
108: ".so is fragile, better use ln(1)",
1.50 schwarze 109: "no document body",
1.54 schwarze 110: "content before first section header",
111: "first section is not \"NAME\"",
1.7 kristaps 112: "bad NAME section contents",
113: "sections out of conventional order",
1.54 schwarze 114: "duplicate section title",
115: "unexpected section",
1.87 schwarze 116: "unusual Xr order",
117: "unusual Xr punctuation",
1.86 schwarze 118: "AUTHORS section without An macro",
1.7 kristaps 119:
120: /* related to macros and nesting */
1.55 schwarze 121: "obsolete macro",
1.7 kristaps 122: "skipping paragraph macro",
1.31 schwarze 123: "moving paragraph macro out of list",
1.7 kristaps 124: "skipping no-space macro",
125: "blocks badly nested",
126: "nested displays are not portable",
1.57 schwarze 127: "moving content out of list",
128: ".Vt block has child macro",
1.78 schwarze 129: "fill mode already enabled, skipping",
130: "fill mode already disabled, skipping",
1.7 kristaps 131: "line scope broken",
132:
133: /* related to missing macro arguments */
1.58 schwarze 134: "skipping empty request",
135: "conditional request controls empty scope",
1.7 kristaps 136: "skipping empty macro",
1.62 schwarze 137: "empty argument, using 0n",
1.7 kristaps 138: "argument count wrong",
1.60 schwarze 139: "missing display type, using -ragged",
140: "list type is not the first argument",
141: "missing -width in -tag list, using 8n",
1.78 schwarze 142: "missing utility name, using \"\"",
1.60 schwarze 143: "empty head in list item",
144: "empty list item",
1.61 schwarze 145: "missing font type, using \\fR",
146: "unknown font type, using \\fR",
1.60 schwarze 147: "missing -std argument, adding it",
1.90 schwarze 148: "missing eqn box, using \"\"",
1.7 kristaps 149:
150: /* related to bad macro arguments */
1.64 schwarze 151: "unterminated quoted argument",
1.7 kristaps 152: "duplicate argument",
1.76 schwarze 153: "skipping duplicate argument",
1.63 schwarze 154: "skipping duplicate display type",
155: "skipping duplicate list type",
1.76 schwarze 156: "skipping -width argument",
1.7 kristaps 157: "unknown AT&T UNIX version",
1.88 schwarze 158: "comma in function argument",
1.89 schwarze 159: "parenthesis in function name",
1.67 schwarze 160: "invalid content in Rs block",
1.63 schwarze 161: "invalid Boolean argument",
162: "unknown font, skipping request",
1.7 kristaps 163:
164: /* related to plain text */
1.64 schwarze 165: "blank line in fill mode, using .sp",
166: "tab in filled text",
167: "whitespace at end of input line",
1.7 kristaps 168: "bad comment style",
1.64 schwarze 169: "invalid escape sequence",
170: "undefined string, using \"\"",
1.16 kristaps 171:
1.7 kristaps 172: "generic error",
1.17 kristaps 173:
174: /* related to equations */
1.20 kristaps 175: "unexpected equation scope closure",
176: "equation scope open on exit",
1.21 kristaps 177: "overlapping equation scopes",
178: "unexpected end of equation",
1.7 kristaps 179:
180: /* related to tables */
181: "bad table syntax",
182: "bad table option",
183: "bad table layout",
184: "no table layout cells specified",
185: "no table data cells specified",
186: "ignore data in cell",
187: "data block still open",
188: "ignoring extra data cells",
189:
1.68 schwarze 190: /* related to document structure and macros */
1.7 kristaps 191: "input stack limit exceeded, infinite loop?",
192: "skipping bad character",
1.68 schwarze 193: "skipping unknown macro",
1.73 schwarze 194: "skipping item outside list",
1.68 schwarze 195: "skipping column outside column list",
196: "skipping end of block that is not open",
197: "inserting missing end of block",
198: "appending missing end of block",
199:
200: /* related to request and macro arguments */
1.7 kristaps 201: "escaped character not allowed in a name",
202: "argument count wrong",
1.71 schwarze 203: "missing list type, using -item",
1.70 schwarze 204: "missing manual name, using \"\"",
1.71 schwarze 205: "uname(3) system call failed, using UNKNOWN",
1.63 schwarze 206: "unknown standard specifier",
1.71 schwarze 207: "skipping request without numeric argument",
1.61 schwarze 208: "skipping all arguments",
209: "skipping excess arguments",
1.92 kristaps 210: "divide by zero",
1.7 kristaps 211:
212: "generic fatal error",
213:
1.40 schwarze 214: "input too large",
1.78 schwarze 215: "NOT IMPLEMENTED: Bd -file",
1.7 kristaps 216: "NOT IMPLEMENTED: .so with absolute path or \"..\"",
1.52 schwarze 217: ".so request failed",
1.40 schwarze 218:
219: /* system errors */
1.82 schwarze 220: "cannot dup file descriptor",
221: "cannot exec",
222: "gunzip failed with code",
223: "cannot fork",
1.51 schwarze 224: NULL,
1.82 schwarze 225: "cannot open pipe",
226: "cannot read file",
227: "gunzip died from signal",
1.40 schwarze 228: "cannot stat file",
1.82 schwarze 229: "wait failed",
1.7 kristaps 230: };
231:
232: static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
233: "SUCCESS",
234: "RESERVED",
235: "WARNING",
236: "ERROR",
237: "FATAL",
238: "BADARG",
239: "SYSERR"
240: };
241:
1.47 schwarze 242:
1.1 kristaps 243: static void
244: resize_buf(struct buf *buf, size_t initial)
245: {
246:
247: buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
248: buf->buf = mandoc_realloc(buf->buf, buf->sz);
249: }
250:
251: static void
1.84 schwarze 252: choose_parser(struct mparse *curp)
1.1 kristaps 253: {
1.83 schwarze 254: char *cp, *ep;
255: int format;
1.1 kristaps 256:
1.83 schwarze 257: /*
258: * If neither command line arguments -mdoc or -man select
259: * a parser nor the roff parser found a .Dd or .TH macro
260: * yet, look ahead in the main input buffer.
261: */
262:
263: if ((format = roff_getformat(curp->roff)) == 0) {
264: cp = curp->primary->buf;
265: ep = cp + curp->primary->sz;
266: while (cp < ep) {
1.85 schwarze 267: if (*cp == '.' || *cp == '\'') {
1.83 schwarze 268: cp++;
269: if (cp[0] == 'D' && cp[1] == 'd') {
270: format = MPARSE_MDOC;
271: break;
272: }
273: if (cp[0] == 'T' && cp[1] == 'H') {
274: format = MPARSE_MAN;
275: break;
276: }
277: }
278: cp = memchr(cp, '\n', ep - cp);
279: if (cp == NULL)
280: break;
281: cp++;
282: }
1.1 kristaps 283: }
284:
1.83 schwarze 285: if (format == MPARSE_MDOC) {
1.47 schwarze 286: if (NULL == curp->pmdoc)
1.44 schwarze 287: curp->pmdoc = mdoc_alloc(
288: curp->roff, curp, curp->defos,
289: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 kristaps 290: assert(curp->pmdoc);
291: curp->mdoc = curp->pmdoc;
292: return;
1.47 schwarze 293: }
1.1 kristaps 294:
1.83 schwarze 295: /* Fall back to man(7) as a last resort. */
296:
1.47 schwarze 297: if (NULL == curp->pman)
1.44 schwarze 298: curp->pman = man_alloc(curp->roff, curp,
299: MPARSE_QUICK & curp->options ? 1 : 0);
1.1 kristaps 300: assert(curp->pman);
301: curp->man = curp->pman;
302: }
303:
304: /*
305: * Main parse routine for an opened file. This is called for each
306: * opened file and simply loops around the full input file, possibly
307: * nesting (i.e., with `so').
308: */
309: static void
310: mparse_buf_r(struct mparse *curp, struct buf blk, int start)
311: {
312: const struct tbl_span *span;
313: struct buf ln;
314: enum rofferr rr;
315: int i, of, rc;
316: int pos; /* byte number in the ln buffer */
317: int lnn; /* line number in the real file */
318: unsigned char c;
319:
320: memset(&ln, 0, sizeof(struct buf));
321:
1.47 schwarze 322: lnn = curp->line;
323: pos = 0;
1.1 kristaps 324:
1.93 ! schwarze 325: for (i = blk.offs; i < (int)blk.sz; ) {
1.1 kristaps 326: if (0 == pos && '\0' == blk.buf[i])
327: break;
328:
329: if (start) {
330: curp->line = lnn;
331: curp->reparse_count = 0;
1.93 ! schwarze 332:
! 333: if (lnn < 3 &&
! 334: curp->filenc & MPARSE_UTF8 &&
! 335: curp->filenc & MPARSE_LATIN1) {
! 336: blk.offs = i;
! 337: curp->filenc = preconv_cue(&blk);
! 338: }
1.1 kristaps 339: }
340:
341: while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
342:
343: /*
344: * When finding an unescaped newline character,
345: * leave the character loop to process the line.
346: * Skip a preceding carriage return, if any.
347: */
348:
349: if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
350: '\n' == blk.buf[i + 1])
351: ++i;
352: if ('\n' == blk.buf[i]) {
353: ++i;
354: ++lnn;
355: break;
356: }
357:
1.35 schwarze 358: /*
1.93 ! schwarze 359: * Make sure we have space for the worst
! 360: * case of 11 bytes: "\\[u10ffff]\0"
1.35 schwarze 361: */
362:
1.93 ! schwarze 363: if (pos + 11 > (int)ln.sz)
1.35 schwarze 364: resize_buf(&ln, 256);
365:
1.47 schwarze 366: /*
1.93 ! schwarze 367: * Encode 8-bit input.
1.1 kristaps 368: */
369:
1.93 ! schwarze 370: c = blk.buf[i];
! 371: if (c & 0x80) {
! 372: blk.offs = i;
! 373: ln.offs = pos;
! 374: if (curp->filenc && preconv_encode(
! 375: &blk, &ln, &curp->filenc)) {
! 376: pos = ln.offs;
! 377: i = blk.offs;
! 378: } else {
! 379: mandoc_vmsg(MANDOCERR_BADCHAR,
! 380: curp, curp->line, pos,
! 381: "0x%x", c);
! 382: ln.buf[pos++] = '?';
! 383: i++;
! 384: }
! 385: continue;
! 386: }
! 387:
! 388: /*
! 389: * Exclude control characters.
! 390: */
1.1 kristaps 391:
1.93 ! schwarze 392: if (c == 0x7f || (c < 0x20 && c != 0x09)) {
1.78 schwarze 393: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
394: curp->line, pos, "0x%x", c);
1.1 kristaps 395: i++;
1.27 joerg 396: ln.buf[pos++] = '?';
1.1 kristaps 397: continue;
398: }
399:
400: /* Trailing backslash = a plain char. */
401:
402: if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
403: ln.buf[pos++] = blk.buf[i++];
404: continue;
405: }
406:
407: /*
408: * Found escape and at least one other character.
409: * When it's a newline character, skip it.
410: * When there is a carriage return in between,
411: * skip that one as well.
412: */
413:
414: if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
415: '\n' == blk.buf[i + 2])
416: ++i;
417: if ('\n' == blk.buf[i + 1]) {
418: i += 2;
419: ++lnn;
420: continue;
421: }
422:
1.13 kristaps 423: if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
1.1 kristaps 424: i += 2;
425: /* Comment, skip to end of line */
426: for (; i < (int)blk.sz; ++i) {
427: if ('\n' == blk.buf[i]) {
428: ++i;
429: ++lnn;
430: break;
431: }
432: }
433:
434: /* Backout trailing whitespaces */
435: for (; pos > 0; --pos) {
436: if (ln.buf[pos - 1] != ' ')
437: break;
438: if (pos > 2 && ln.buf[pos - 2] == '\\')
439: break;
440: }
441: break;
442: }
443:
1.35 schwarze 444: /* Catch escaped bogus characters. */
445:
446: c = (unsigned char) blk.buf[i+1];
447:
1.47 schwarze 448: if ( ! (isascii(c) &&
449: (isgraph(c) || isblank(c)))) {
1.78 schwarze 450: mandoc_vmsg(MANDOCERR_BADCHAR, curp,
451: curp->line, pos, "0x%x", c);
1.35 schwarze 452: i += 2;
453: ln.buf[pos++] = '?';
454: continue;
455: }
456:
1.1 kristaps 457: /* Some other escape sequence, copy & cont. */
458:
459: ln.buf[pos++] = blk.buf[i++];
460: ln.buf[pos++] = blk.buf[i++];
461: }
462:
1.47 schwarze 463: if (pos >= (int)ln.sz)
1.1 kristaps 464: resize_buf(&ln, 256);
465:
466: ln.buf[pos] = '\0';
467:
468: /*
469: * A significant amount of complexity is contained by
470: * the roff preprocessor. It's line-oriented but can be
471: * expressed on one line, so we need at times to
472: * readjust our starting point and re-run it. The roff
473: * preprocessor can also readjust the buffers with new
474: * data, so we pass them in wholesale.
475: */
476:
477: of = 0;
478:
1.24 kristaps 479: /*
480: * Maintain a lookaside buffer of all parsed lines. We
481: * only do this if mparse_keep() has been invoked (the
482: * buffer may be accessed with mparse_getkeep()).
483: */
484:
485: if (curp->secondary) {
1.47 schwarze 486: curp->secondary->buf = mandoc_realloc(
487: curp->secondary->buf,
488: curp->secondary->sz + pos + 2);
489: memcpy(curp->secondary->buf +
490: curp->secondary->sz,
491: ln.buf, pos);
1.24 kristaps 492: curp->secondary->sz += pos;
493: curp->secondary->buf
494: [curp->secondary->sz] = '\n';
495: curp->secondary->sz++;
496: curp->secondary->buf
497: [curp->secondary->sz] = '\0';
498: }
1.1 kristaps 499: rerun:
1.47 schwarze 500: rr = roff_parseln(curp->roff, curp->line,
501: &ln.buf, &ln.sz, of, &of);
1.1 kristaps 502:
503: switch (rr) {
1.47 schwarze 504: case ROFF_REPARSE:
1.1 kristaps 505: if (REPARSE_LIMIT >= ++curp->reparse_count)
506: mparse_buf_r(curp, ln, 0);
507: else
1.3 kristaps 508: mandoc_msg(MANDOCERR_ROFFLOOP, curp,
1.47 schwarze 509: curp->line, pos, NULL);
1.1 kristaps 510: pos = 0;
511: continue;
1.47 schwarze 512: case ROFF_APPEND:
1.1 kristaps 513: pos = (int)strlen(ln.buf);
514: continue;
1.47 schwarze 515: case ROFF_RERUN:
1.1 kristaps 516: goto rerun;
1.47 schwarze 517: case ROFF_IGN:
1.1 kristaps 518: pos = 0;
519: continue;
1.47 schwarze 520: case ROFF_ERR:
1.1 kristaps 521: assert(MANDOCLEVEL_FATAL <= curp->file_status);
522: break;
1.47 schwarze 523: case ROFF_SO:
1.45 schwarze 524: if (0 == (MPARSE_SO & curp->options) &&
525: (i >= (int)blk.sz || '\0' == blk.buf[i])) {
526: curp->sodest = mandoc_strdup(ln.buf + of);
527: free(ln.buf);
528: return;
529: }
1.24 kristaps 530: /*
531: * We remove `so' clauses from our lookaside
532: * buffer because we're going to descend into
533: * the file recursively.
534: */
1.47 schwarze 535: if (curp->secondary)
1.25 kristaps 536: curp->secondary->sz -= pos + 1;
1.36 schwarze 537: mparse_readfd(curp, -1, ln.buf + of);
1.52 schwarze 538: if (MANDOCLEVEL_FATAL <= curp->file_status) {
539: mandoc_vmsg(MANDOCERR_SO_FAIL,
540: curp, curp->line, pos,
541: ".so %s", ln.buf + of);
1.1 kristaps 542: break;
1.52 schwarze 543: }
1.1 kristaps 544: pos = 0;
545: continue;
546: default:
547: break;
548: }
549:
550: /*
551: * If we encounter errors in the recursive parse, make
552: * sure we don't continue parsing.
553: */
554:
555: if (MANDOCLEVEL_FATAL <= curp->file_status)
556: break;
557:
558: /*
559: * If input parsers have not been allocated, do so now.
1.14 kristaps 560: * We keep these instanced between parsers, but set them
1.1 kristaps 561: * locally per parse routine since we can use different
562: * parsers with each one.
563: */
564:
565: if ( ! (curp->man || curp->mdoc))
1.84 schwarze 566: choose_parser(curp);
1.1 kristaps 567:
1.47 schwarze 568: /*
1.84 schwarze 569: * Lastly, push down into the parsers themselves.
1.1 kristaps 570: * If libroff returns ROFF_TBL, then add it to the
571: * currently open parse. Since we only get here if
572: * there does exist data (see tbl_data.c), we're
573: * guaranteed that something's been allocated.
574: * Do the same for ROFF_EQN.
575: */
576:
577: rc = -1;
578:
579: if (ROFF_TBL == rr)
580: while (NULL != (span = roff_span(curp->roff))) {
581: rc = curp->man ?
1.47 schwarze 582: man_addspan(curp->man, span) :
583: mdoc_addspan(curp->mdoc, span);
1.1 kristaps 584: if (0 == rc)
585: break;
586: }
587: else if (ROFF_EQN == rr)
1.47 schwarze 588: rc = curp->mdoc ?
589: mdoc_addeqn(curp->mdoc,
590: roff_eqn(curp->roff)) :
591: man_addeqn(curp->man,
592: roff_eqn(curp->roff));
1.1 kristaps 593: else if (curp->man || curp->mdoc)
594: rc = curp->man ?
1.47 schwarze 595: man_parseln(curp->man,
596: curp->line, ln.buf, of) :
597: mdoc_parseln(curp->mdoc,
598: curp->line, ln.buf, of);
1.1 kristaps 599:
600: if (0 == rc) {
601: assert(MANDOCLEVEL_FATAL <= curp->file_status);
602: break;
1.41 schwarze 603: } else if (2 == rc)
604: break;
1.1 kristaps 605:
606: /* Temporary buffers typically are not full. */
607:
608: if (0 == start && '\0' == blk.buf[i])
609: break;
610:
611: /* Start the next input line. */
612:
613: pos = 0;
614: }
615:
616: free(ln.buf);
617: }
618:
619: static int
1.40 schwarze 620: read_whole_file(struct mparse *curp, const char *file, int fd,
621: struct buf *fb, int *with_mmap)
1.1 kristaps 622: {
623: size_t off;
624: ssize_t ssz;
625:
1.81 schwarze 626: #if HAVE_MMAP
1.15 kristaps 627: struct stat st;
1.1 kristaps 628: if (-1 == fstat(fd, &st)) {
1.40 schwarze 629: curp->file_status = MANDOCLEVEL_SYSERR;
630: if (curp->mmsg)
631: (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
632: file, 0, 0, strerror(errno));
1.1 kristaps 633: return(0);
634: }
635:
636: /*
637: * If we're a regular file, try just reading in the whole entry
638: * via mmap(). This is faster than reading it into blocks, and
639: * since each file is only a few bytes to begin with, I'm not
640: * concerned that this is going to tank any machines.
641: */
642:
643: if (S_ISREG(st.st_mode)) {
644: if (st.st_size >= (1U << 31)) {
1.40 schwarze 645: curp->file_status = MANDOCLEVEL_FATAL;
646: if (curp->mmsg)
647: (*curp->mmsg)(MANDOCERR_TOOLARGE,
648: curp->file_status, file, 0, 0, NULL);
1.1 kristaps 649: return(0);
650: }
651: *with_mmap = 1;
1.93 ! schwarze 652: fb->offs = 0;
1.1 kristaps 653: fb->sz = (size_t)st.st_size;
1.37 schwarze 654: fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
1.1 kristaps 655: if (fb->buf != MAP_FAILED)
656: return(1);
657: }
1.15 kristaps 658: #endif
1.1 kristaps 659:
660: /*
661: * If this isn't a regular file (like, say, stdin), then we must
662: * go the old way and just read things in bit by bit.
663: */
664:
665: *with_mmap = 0;
666: off = 0;
667: fb->sz = 0;
668: fb->buf = NULL;
669: for (;;) {
670: if (off == fb->sz) {
671: if (fb->sz == (1U << 31)) {
1.40 schwarze 672: curp->file_status = MANDOCLEVEL_FATAL;
673: if (curp->mmsg)
674: (*curp->mmsg)(MANDOCERR_TOOLARGE,
675: curp->file_status,
676: file, 0, 0, NULL);
1.1 kristaps 677: break;
678: }
679: resize_buf(fb, 65536);
680: }
681: ssz = read(fd, fb->buf + (int)off, fb->sz - off);
682: if (ssz == 0) {
683: fb->sz = off;
1.93 ! schwarze 684: fb->offs = 0;
1.1 kristaps 685: return(1);
686: }
687: if (ssz == -1) {
1.40 schwarze 688: curp->file_status = MANDOCLEVEL_SYSERR;
689: if (curp->mmsg)
690: (*curp->mmsg)(MANDOCERR_SYSREAD,
691: curp->file_status, file, 0, 0,
692: strerror(errno));
1.1 kristaps 693: break;
694: }
695: off += (size_t)ssz;
696: }
697:
698: free(fb->buf);
699: fb->buf = NULL;
700: return(0);
701: }
702:
703: static void
704: mparse_end(struct mparse *curp)
705: {
706:
707: if (MANDOCLEVEL_FATAL <= curp->file_status)
708: return;
709:
1.72 schwarze 710: if (curp->mdoc == NULL &&
711: curp->man == NULL &&
712: curp->sodest == NULL) {
713: if (curp->options & MPARSE_MDOC)
714: curp->mdoc = curp->pmdoc;
715: else {
716: if (curp->pman == NULL)
717: curp->pman = man_alloc(curp->roff, curp,
718: curp->options & MPARSE_QUICK ? 1 : 0);
719: curp->man = curp->pman;
720: }
721: }
722:
1.1 kristaps 723: if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
724: assert(MANDOCLEVEL_FATAL <= curp->file_status);
725: return;
726: }
727:
728: if (curp->man && ! man_endparse(curp->man)) {
729: assert(MANDOCLEVEL_FATAL <= curp->file_status);
730: return;
731: }
732:
733: roff_endparse(curp->roff);
734: }
735:
736: static void
1.36 schwarze 737: mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
1.28 joerg 738: {
1.85 schwarze 739: struct buf *svprimary;
1.28 joerg 740: const char *svfile;
1.36 schwarze 741: static int recursion_depth;
742:
743: if (64 < recursion_depth) {
744: mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
745: return;
746: }
1.28 joerg 747:
748: /* Line number is per-file. */
749: svfile = curp->file;
750: curp->file = file;
1.85 schwarze 751: svprimary = curp->primary;
1.83 schwarze 752: curp->primary = &blk;
1.28 joerg 753: curp->line = 1;
1.36 schwarze 754: recursion_depth++;
1.28 joerg 755:
1.93 ! schwarze 756: /* Skip an UTF-8 byte order mark. */
! 757: if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
! 758: (unsigned char)blk.buf[0] == 0xef &&
! 759: (unsigned char)blk.buf[1] == 0xbb &&
! 760: (unsigned char)blk.buf[2] == 0xbf) {
! 761: blk.offs = 3;
! 762: curp->filenc &= ~MPARSE_LATIN1;
! 763: }
! 764:
1.28 joerg 765: mparse_buf_r(curp, blk, 1);
766:
1.36 schwarze 767: if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
1.28 joerg 768: mparse_end(curp);
769:
1.85 schwarze 770: curp->primary = svprimary;
1.28 joerg 771: curp->file = svfile;
772: }
773:
774: enum mandoclevel
775: mparse_readmem(struct mparse *curp, const void *buf, size_t len,
776: const char *file)
777: {
778: struct buf blk;
779:
780: blk.buf = UNCONST(buf);
781: blk.sz = len;
1.93 ! schwarze 782: blk.offs = 0;
1.28 joerg 783:
1.36 schwarze 784: mparse_parse_buffer(curp, blk, file);
1.28 joerg 785: return(curp->file_status);
786: }
787:
1.36 schwarze 788: enum mandoclevel
789: mparse_readfd(struct mparse *curp, int fd, const char *file)
1.1 kristaps 790: {
1.28 joerg 791: struct buf blk;
792: int with_mmap;
1.93 ! schwarze 793: int save_filenc;
1.1 kristaps 794:
1.40 schwarze 795: if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
796: curp->file_status = MANDOCLEVEL_SYSERR;
797: if (curp->mmsg)
798: (*curp->mmsg)(MANDOCERR_SYSOPEN,
799: curp->file_status,
800: file, 0, 0, strerror(errno));
1.91 schwarze 801: return(curp->file_status);
1.40 schwarze 802: }
803:
1.28 joerg 804: /*
805: * Run for each opened file; may be called more than once for
806: * each full parse sequence if the opened file is nested (i.e.,
807: * from `so'). Simply sucks in the whole file and moves into
808: * the parse phase for the file.
809: */
1.1 kristaps 810:
1.91 schwarze 811: if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
1.93 ! schwarze 812: save_filenc = curp->filenc;
! 813: curp->filenc = curp->options &
! 814: (MPARSE_UTF8 | MPARSE_LATIN1);
1.91 schwarze 815: mparse_parse_buffer(curp, blk, file);
1.93 ! schwarze 816: curp->filenc = save_filenc;
1.81 schwarze 817: #if HAVE_MMAP
1.91 schwarze 818: if (with_mmap)
819: munmap(blk.buf, blk.sz);
820: else
1.28 joerg 821: #endif
1.91 schwarze 822: free(blk.buf);
823: }
1.1 kristaps 824:
825: if (STDIN_FILENO != fd && -1 == close(fd))
826: perror(file);
1.91 schwarze 827:
1.1 kristaps 828: return(curp->file_status);
1.82 schwarze 829: }
830:
831: enum mandoclevel
832: mparse_open(struct mparse *curp, int *fd, const char *file,
833: pid_t *child_pid)
834: {
835: int pfd[2];
836: char *cp;
837: enum mandocerr err;
838:
839: pfd[1] = -1;
840: curp->file = file;
841: if ((cp = strrchr(file, '.')) == NULL ||
842: strcmp(cp + 1, "gz")) {
843: *child_pid = 0;
844: if ((*fd = open(file, O_RDONLY)) == -1) {
845: err = MANDOCERR_SYSOPEN;
846: goto out;
847: }
848: return(MANDOCLEVEL_OK);
849: }
850:
851: if (pipe(pfd) == -1) {
852: err = MANDOCERR_SYSPIPE;
853: goto out;
854: }
855:
856: switch (*child_pid = fork()) {
857: case -1:
858: err = MANDOCERR_SYSFORK;
859: close(pfd[0]);
860: close(pfd[1]);
861: pfd[1] = -1;
862: break;
863: case 0:
864: close(pfd[0]);
865: if (dup2(pfd[1], STDOUT_FILENO) == -1) {
866: err = MANDOCERR_SYSDUP;
867: break;
868: }
869: execlp("gunzip", "gunzip", "-c", file, NULL);
870: err = MANDOCERR_SYSEXEC;
871: break;
872: default:
873: close(pfd[1]);
874: *fd = pfd[0];
875: return(MANDOCLEVEL_OK);
876: }
877:
878: out:
879: *fd = -1;
880: *child_pid = 0;
881: curp->file_status = MANDOCLEVEL_SYSERR;
882: if (curp->mmsg)
883: (*curp->mmsg)(err, curp->file_status, file,
884: 0, 0, strerror(errno));
885: if (pfd[1] != -1)
886: exit(1);
887: return(curp->file_status);
888: }
889:
890: enum mandoclevel
891: mparse_wait(struct mparse *curp, pid_t child_pid)
892: {
893: int status;
894:
895: if (waitpid(child_pid, &status, 0) == -1) {
896: mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
897: strerror(errno));
898: curp->file_status = MANDOCLEVEL_SYSERR;
899: return(curp->file_status);
900: }
901: if (WIFSIGNALED(status)) {
902: mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
903: "%d", WTERMSIG(status));
904: curp->file_status = MANDOCLEVEL_SYSERR;
905: return(curp->file_status);
906: }
907: if (WEXITSTATUS(status)) {
908: mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
909: "%d", WEXITSTATUS(status));
910: curp->file_status = MANDOCLEVEL_SYSERR;
911: return(curp->file_status);
912: }
913: return(MANDOCLEVEL_OK);
1.1 kristaps 914: }
915:
916: struct mparse *
1.44 schwarze 917: mparse_alloc(int options, enum mandoclevel wlevel,
1.69 schwarze 918: mandocmsg mmsg, const char *defos)
1.1 kristaps 919: {
920: struct mparse *curp;
1.10 kristaps 921:
922: assert(wlevel <= MANDOCLEVEL_FATAL);
1.1 kristaps 923:
924: curp = mandoc_calloc(1, sizeof(struct mparse));
925:
1.44 schwarze 926: curp->options = options;
1.3 kristaps 927: curp->wlevel = wlevel;
1.1 kristaps 928: curp->mmsg = mmsg;
1.29 schwarze 929: curp->defos = defos;
1.1 kristaps 930:
1.44 schwarze 931: curp->roff = roff_alloc(curp, options);
1.72 schwarze 932: if (curp->options & MPARSE_MDOC)
933: curp->pmdoc = mdoc_alloc(
934: curp->roff, curp, curp->defos,
935: curp->options & MPARSE_QUICK ? 1 : 0);
936: if (curp->options & MPARSE_MAN)
937: curp->pman = man_alloc(curp->roff, curp,
938: curp->options & MPARSE_QUICK ? 1 : 0);
939:
1.1 kristaps 940: return(curp);
941: }
942:
943: void
944: mparse_reset(struct mparse *curp)
945: {
946:
947: roff_reset(curp->roff);
948:
949: if (curp->mdoc)
950: mdoc_reset(curp->mdoc);
951: if (curp->man)
952: man_reset(curp->man);
1.24 kristaps 953: if (curp->secondary)
954: curp->secondary->sz = 0;
1.1 kristaps 955:
956: curp->file_status = MANDOCLEVEL_OK;
957: curp->mdoc = NULL;
958: curp->man = NULL;
1.45 schwarze 959:
960: free(curp->sodest);
961: curp->sodest = NULL;
1.1 kristaps 962: }
963:
964: void
965: mparse_free(struct mparse *curp)
966: {
967:
968: if (curp->pmdoc)
969: mdoc_free(curp->pmdoc);
970: if (curp->pman)
971: man_free(curp->pman);
972: if (curp->roff)
973: roff_free(curp->roff);
1.24 kristaps 974: if (curp->secondary)
975: free(curp->secondary->buf);
1.1 kristaps 976:
1.24 kristaps 977: free(curp->secondary);
1.45 schwarze 978: free(curp->sodest);
1.1 kristaps 979: free(curp);
980: }
981:
982: void
1.45 schwarze 983: mparse_result(struct mparse *curp,
984: struct mdoc **mdoc, struct man **man, char **sodest)
1.1 kristaps 985: {
986:
1.45 schwarze 987: if (sodest && NULL != (*sodest = curp->sodest)) {
988: *mdoc = NULL;
989: *man = NULL;
990: return;
991: }
1.9 kristaps 992: if (mdoc)
993: *mdoc = curp->mdoc;
994: if (man)
995: *man = curp->man;
1.3 kristaps 996: }
997:
998: void
999: mandoc_vmsg(enum mandocerr t, struct mparse *m,
1000: int ln, int pos, const char *fmt, ...)
1001: {
1002: char buf[256];
1003: va_list ap;
1004:
1005: va_start(ap, fmt);
1.48 schwarze 1006: (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1.3 kristaps 1007: va_end(ap);
1008:
1009: mandoc_msg(t, m, ln, pos, buf);
1010: }
1011:
1012: void
1.47 schwarze 1013: mandoc_msg(enum mandocerr er, struct mparse *m,
1.3 kristaps 1014: int ln, int col, const char *msg)
1015: {
1016: enum mandoclevel level;
1017:
1018: level = MANDOCLEVEL_FATAL;
1019: while (er < mandoclimits[level])
1020: level--;
1021:
1022: if (level < m->wlevel)
1023: return;
1024:
1.8 kristaps 1025: if (m->mmsg)
1026: (*m->mmsg)(er, level, m->file, ln, col, msg);
1.3 kristaps 1027:
1028: if (m->file_status < level)
1029: m->file_status = level;
1.7 kristaps 1030: }
1031:
1032: const char *
1033: mparse_strerror(enum mandocerr er)
1034: {
1035:
1036: return(mandocerrs[er]);
1037: }
1038:
1039: const char *
1040: mparse_strlevel(enum mandoclevel lvl)
1041: {
1042: return(mandoclevels[lvl]);
1.24 kristaps 1043: }
1044:
1045: void
1046: mparse_keep(struct mparse *p)
1047: {
1048:
1049: assert(NULL == p->secondary);
1050: p->secondary = mandoc_calloc(1, sizeof(struct buf));
1051: }
1052:
1053: const char *
1054: mparse_getkeep(const struct mparse *p)
1055: {
1056:
1057: assert(p->secondary);
1058: return(p->secondary->sz ? p->secondary->buf : NULL);
1.1 kristaps 1059: }
CVSweb