===================================================================
RCS file: /cvs/mandoc/main.c,v
retrieving revision 1.118
retrieving revision 1.143
diff -u -p -r1.118 -r1.143
--- mandoc/main.c	2010/12/05 15:55:01	1.118
+++ mandoc/main.c	2011/02/06 20:36:36	1.143
@@ -1,7 +1,7 @@
-/*	$Id: main.c,v 1.118 2010/12/05 15:55:01 kristaps Exp $ */
+/*	$Id: main.c,v 1.143 2011/02/06 20:36:36 kristaps Exp $ */
 /*
- * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -41,6 +41,7 @@
 #define	MAP_FILE	0
 #endif
 
+#define	REPARSE_LIMIT	1000
 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
 
 /* FIXME: Intel's compiler?  LLVM?  pcc?  */
@@ -89,6 +90,7 @@ struct	curparse {
 	struct mdoc	 *mdoc;		/* mdoc parser */
 	struct roff	 *roff;		/* roff parser (!NULL) */
 	struct regset	  regs;		/* roff registers */
+	int		  reparse_count; /* finite interpolation stack */
 	enum outt	  outtype; 	/* which output to use */
 	out_mdoc	  outmdoc;	/* mdoc output ptr */
 	out_man	  	  outman;	/* man output ptr */
@@ -122,71 +124,97 @@ static	const char * const	mandocerrs[MANDOCERR_MAX] = 
 
 	"generic warning",
 
+	/* related to the prologue */
+	"no title in document",
+	"document title should be all caps",
+	"unknown manual section",
+	"cannot parse date argument",
+	"prologue macros out of order",
+	"duplicate prologue macro",
+	"macro not allowed in prologue",
+	"macro not allowed in body",
+
+	/* related to document structure */
 	".so is fragile, better use ln(1)",
-	"text should be uppercase",
+	"NAME section must come first",
+	"bad NAME section contents",
+	"manual name not yet set",
 	"sections out of conventional order",
-	"section name repeats",
-	"out of order prologue",
-	"repeated prologue entry",
-	"list type must come first",
-	"tab in non-literal context",
-	"bad escape sequence",
-	"unterminated quoted string",
-	"argument requires the width argument",
-	"bad date argument",
-	"bad width argument",
-	"unknown manual section",
+	"duplicate section name",
 	"section not in conventional manual section",
-	"end of line whitespace",
+
+	/* related to macros and nesting */
+	"skipping obsolete macro",
+	"skipping paragraph macro",
+	"skipping no-space macro",
 	"blocks badly nested",
+	"child violates parent syntax",
+	"nested displays are not portable",
+	"already in literal mode",
 
-	"generic error",
+	/* related to missing macro arguments */
+	"skipping empty macro",
+	"argument count wrong",
+	"missing display type",
+	"list type must come first",
+	"tag lists require a width argument",
+	"missing font type",
+	"skipping end of block that is not open",
 
-	"NAME section must come first",
+	/* related to bad macro arguments */
+	"skipping argument",
+	"duplicate argument",
+	"duplicate display type",
+	"duplicate list type",
+	"unknown AT&T UNIX version",
 	"bad Boolean value",
-	"child violates parent syntax",
-	"displays may not be nested",
-	"bad AT&T symbol",
-	"bad standard",
-	"list type repeated",
-	"display type repeated",
-	"argument repeated",
-	"ignoring argument",
-	"manual name not yet set",
-	"obsolete macro ignored",
-	"empty macro ignored",
-	"macro not allowed in body",
-	"macro not allowed in prologue",
-	"bad character",
-	"bad NAME section contents",
-	"no blank lines",
-	"no text in this context",
+	"unknown font",
+	"unknown standard specifier",
+	"bad width argument",
+
+	/* related to plain text */
+	"blank line in non-literal context",
+	"tab in non-literal context",
+	"end of line whitespace",
 	"bad comment style",
-	"unknown macro will be lost",
-	"NOT IMPLEMENTED: skipping request",
+	"unknown escape sequence",
+	"unterminated quoted string",
+	
+	"generic error",
+
+	/* related to tables */
+	"bad table syntax",
+	"bad table option",
+	"bad table layout",
+	"no table layout cells specified",
+	"no table data cells specified",
+	"ignore data in cell",
+	"data block still open",
+	"ignoring extra data cells",
+
+	"input stack limit exceeded, infinite loop?",
+	"skipping bad character",
+	"escaped character not allowed in a name",
+	"skipping text before the first section header",
+	"skipping unknown macro",
+	"NOT IMPLEMENTED, please use groff: skipping request",
 	"line scope broken",
 	"argument count wrong",
-	"request scope close w/none open",
-	"scope already open",
+	"skipping end of block that is not open",
+	"missing end of block",
 	"scope open on exit",
 	"uname(3) system call failed",
 	"macro requires line argument(s)",
 	"macro requires body argument(s)",
 	"macro requires argument(s)",
-	"no title in document",
 	"missing list type",
-	"missing display type",
-	"missing font type",
 	"line argument(s) will be lost",
 	"body argument(s) will be lost",
-	"paragraph macro ignored",
 
 	"generic fatal error",
 
 	"column syntax is inconsistent",
-	"unsupported display type",
-	"blocks badly nested",
-	"no such block is open",
+	"NOT IMPLEMENTED: .Bd -file",
 	"line scope broken, syntax violated",
 	"argument count wrong, violates syntax",
 	"child violates parent syntax",
@@ -375,7 +403,7 @@ static void
 resize_buf(struct buf *buf, size_t initial)
 {
 
-	buf->sz = buf->sz ? 2 * buf->sz : initial;
+	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
 	buf->buf = realloc(buf->buf, buf->sz);
 	if (NULL == buf->buf) {
 		perror(NULL);
@@ -508,10 +536,7 @@ fdesc(struct curparse *curp)
 	}
 
 	assert(curp->roff);
-	if ( ! roff_endparse(curp->roff)) {
-		assert(MANDOCLEVEL_FATAL <= file_status);
-		goto cleanup;
-	}
+	roff_endparse(curp->roff);
 
 	/*
 	 * With -Wstop and warnings or errors of at least
@@ -632,6 +657,7 @@ pdesc(struct curparse *curp)
 static void
 parsebuf(struct curparse *curp, struct buf blk, int start)
 {
+	const struct tbl_span	*span;
 	struct buf	 ln;
 	enum rofferr	 rr;
 	int		 i, of, rc;
@@ -654,10 +680,22 @@ parsebuf(struct curparse *curp, struct buf blk, int st
 		if (0 == pos && '\0' == blk.buf[i])
 			break;
 
-		if (start)
+		if (start) {
 			curp->line = lnn;
+			curp->reparse_count = 0;
+		}
 
 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
+
+			/*
+			 * When finding an unescaped newline character,
+			 * leave the character loop to process the line.
+			 * Skip a preceding carriage return, if any.
+			 */
+
+			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
+			    '\n' == blk.buf[i + 1])
+				++i;
 			if ('\n' == blk.buf[i]) {
 				++i;
 				++lnn;
@@ -692,11 +730,18 @@ parsebuf(struct curparse *curp, struct buf blk, int st
 				continue;
 			}
 
-			/* Found escape & at least one other char. */
+			/*
+			 * Found escape and at least one other character.
+			 * When it's a newline character, skip it.
+			 * When there is a carriage return in between,
+			 * skip that one as well.
+			 */
 
+			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
+			    '\n' == blk.buf[i + 2])
+				++i;
 			if ('\n' == blk.buf[i + 1]) {
 				i += 2;
-				/* Escaped newlines are skipped over */
 				++lnn;
 				continue;
 			}
@@ -754,7 +799,11 @@ rerun:
 
 		switch (rr) {
 		case (ROFF_REPARSE):
-			parsebuf(curp, ln, 0);
+			if (REPARSE_LIMIT >= ++curp->reparse_count)
+				parsebuf(curp, ln, 0);
+			else
+				mmsg(MANDOCERR_ROFFLOOP, curp, 
+				    curp->line, pos, NULL);
 			pos = 0;
 			continue;
 		case (ROFF_APPEND):
@@ -774,11 +823,19 @@ rerun:
 				continue;
 			} else
 				break;
-		case (ROFF_CONT):
+		default:
 			break;
 		}
 
 		/*
+		 * If we encounter errors in the recursive parsebuf()
+		 * call, make sure we don't continue parsing.
+		 */
+
+		if (MANDOCLEVEL_FATAL <= file_status)
+			break;
+
+		/*
 		 * If input parsers have not been allocated, do so now.
 		 * We keep these instanced betwen parsers, but set them
 		 * locally per parse routine since we can use different
@@ -792,9 +849,23 @@ rerun:
 		 * Lastly, push down into the parsers themselves.  One
 		 * of these will have already been set in the pset()
 		 * routine.
+		 * If libroff returns ROFF_TBL, then add it to the
+		 * currently open parse.  Since we only get here if
+		 * there does exist data (see tbl_data.c), we're
+		 * guaranteed that something's been allocated.
 		 */
 
-		if (curp->man || curp->mdoc) {
+		if (ROFF_TBL == rr) {
+			assert(curp->man || curp->mdoc);
+			while (NULL != (span = roff_span(curp->roff))) {
+				if (curp->man)
+					man_addspan(curp->man, span);
+				else
+					mdoc_addspan(curp->mdoc, span);
+			}
+		} else if (ROFF_EQN == rr) {
+			assert(curp->man || curp->mdoc);
+		} else if (curp->man || curp->mdoc) {
 			rc = curp->man ?
 				man_parseln(curp->man, 
 					curp->line, ln.buf, of) :