/*	$Id: pod2mdoc.c,v 1.34 2014/07/19 00:42:22 schwarze Exp $ */
/*
 * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/stat.h>
#include <sys/time.h>

#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

/*
 * In what section can we find Perl module manuals?
 * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
 * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
 */
#define	PERL_SECTION	"3p"

struct	args {
	const char	*title; /* override "Dt" title */
	const char	*date; /* override "Dd" date */
	const char	*section; /* override "Dt" section */
};

enum	list {
	LIST_BULLET = 0,
	LIST_ENUM,
	LIST_TAG,
	LIST__MAX
};

enum	sect {
	SECT_NONE = 0,
	SECT_NAME, /* NAME section */
	SECT_SYNOPSIS, /* SYNOPSIS section */
};

enum	outstate {
	OUST_NL = 0,	/* just started a new output line */
	OUST_TXT,	/* text line output in progress */
	OUST_MAC	/* macro line output in progress */
};

struct	state {
	const char	*fname; /* file being parsed */
	int		 parsing; /* after =cut of before command */
	int		 paused; /* in =begin and before =end */
	enum sect	 sect; /* which section are we in? */
#define	LIST_STACKSZ	 128
	enum list	 lstack[LIST_STACKSZ]; /* open lists */
	size_t		 lpos; /* where in list stack */
	int		 haspar; /* in paragraph: do we need Pp? */
	enum outstate	 oust; /* state of the mdoc output stream */
	int		 wantws; /* let mdoc(7) output whitespace here */
	char		*outbuf; /* text buffered for output */
	size_t		 outbufsz; /* allocated size of outbuf */
	size_t		 outbuflen; /* current length of outbuf */
};

enum	fmt {
	FMT_ITALIC,
	FMT_BOLD,
	FMT_CODE,
	FMT_LINK,
	FMT_ESCAPE,
	FMT_FILE,
	FMT_NBSP,
	FMT_INDEX,
	FMT_NULL,
	FMT__MAX
};

enum	cmd {
	CMD_POD = 0,
	CMD_HEAD1,
	CMD_HEAD2,
	CMD_HEAD3,
	CMD_HEAD4,
	CMD_OVER,
	CMD_ITEM,
	CMD_BACK,
	CMD_BEGIN,
	CMD_END,
	CMD_FOR,
	CMD_ENCODING,
	CMD_CUT,
	CMD__MAX
};

static	const char *const cmds[CMD__MAX] = {
	"pod", 		/* CMD_POD */
	"head1",	/* CMD_HEAD1 */
	"head2",	/* CMD_HEAD2 */
	"head3",	/* CMD_HEAD3 */
	"head4",	/* CMD_HEAD4 */
	"over",		/* CMD_OVER */
	"item",		/* CMD_ITEM */
	"back",		/* CMD_BACK */
	"begin",	/* CMD_BEGIN */
	"end",		/* CMD_END */
	"for",		/* CMD_FOR */
	"encoding",	/* CMD_ENCODING */
	"cut"		/* CMD_CUT */
};

static	const char fmts[FMT__MAX] = {
	'I',		/* FMT_ITALIC */
	'B',		/* FMT_BOLD */
	'C',		/* FMT_CODE */
	'L',		/* FMT_LINK */
	'E',		/* FMT_ESCAPE */
	'F',		/* FMT_FILE */
	'S',		/* FMT_NBSP */
	'X',		/* FMT_INDEX */
	'Z'		/* FMT_NULL */
};

static	int 	last;


static void
outbuf_grow(struct state *st, size_t by)
{

	st->outbufsz += (by / 128 + 1) * 128;
	st->outbuf = realloc(st->outbuf, st->outbufsz);
	if (NULL == st->outbuf) {
		perror(NULL);
		exit(EXIT_FAILURE);
	}
}

static void
outbuf_addchar(struct state *st)
{

	if (st->outbuflen + 2 >= st->outbufsz)
		outbuf_grow(st, 1);
	st->outbuf[st->outbuflen++] = last;
	if ('\\' == last)
		st->outbuf[st->outbuflen++] = 'e';
	st->outbuf[st->outbuflen] = '\0';
	st->wantws = 0;
}

static void
outbuf_addstr(struct state *st, const char *str)
{
	size_t	 slen;

	slen = strlen(str);
	if (st->outbuflen + slen >= st->outbufsz)
		outbuf_grow(st, slen);
	memcpy(st->outbuf + st->outbuflen, str, slen+1);
	st->outbuflen += slen;
	last = str[slen - 1];
	st->wantws = 0;
}

static void
outbuf_flush(struct state *st)
{

	if (0 == st->outbuflen)
		return;

	fputs(st->outbuf, stdout);
	*st->outbuf = '\0';
	st->outbuflen = 0;

	if (OUST_NL == st->oust)
		st->oust = OUST_TXT;
}

static void
mdoc_newln(struct state *st)
{

	if (OUST_NL == st->oust)
		return;

	putchar('\n');
	last = '\n';
	st->oust = OUST_NL;
	st->wantws = 1;
}

/*
 * Given buf[*start] is at the start of an escape name, read til the end
 * of the escape ('>') then try to do something with it.
 * Sets start to be one after the '>'.
 *
 * This function does not care about output modes,
 * it merely appends text to the output buffer,
 * which can then be used in any mode.
 */
static void
formatescape(struct state *st, const char *buf, size_t *start, size_t end)
{
	char		 esc[16]; /* no more needed */
	size_t		 i, max;

	max = sizeof(esc) - 1;
	i = 0;
	/* Read til our buffer is full. */
	while (*start < end && '>' != buf[*start] && i < max)
		esc[i++] = buf[(*start)++];
	esc[i] = '\0';

	if (i == max) {
		/* Too long... skip til we end. */
		while (*start < end && '>' != buf[*start])
			(*start)++;
		return;
	} else if (*start >= end)
		return;

	assert('>' == buf[*start]);
	(*start)++;

	/*
	 * TODO: right now, we only recognise the named escapes.
	 * Just let the rest of them go. 
	 */
	if (0 == strcmp(esc, "lt")) 
		outbuf_addstr(st, "\\(la");
	else if (0 == strcmp(esc, "gt"))
		outbuf_addstr(st, "\\(ra");
	else if (0 == strcmp(esc, "verbar"))
		outbuf_addstr(st, "\\(ba");
	else if (0 == strcmp(esc, "sol"))
		outbuf_addstr(st, "\\(sl");
}

/*
 * Run some heuristics to intuit a link format.
 * I set "start" to be the end of the sequence (last right-carrot) so
 * that the caller can safely just continue processing.
 * If this is just an empty tag, I'll return 0.
 *
 * Always operates in OUST_MAC mode.
 * Mode handling is done by the caller.
 */
static int
trylink(const char *buf, size_t *start, size_t end, size_t dsz)
{
	size_t		 linkstart, realend, linkend, 
			 i, j, textsz, stack;

	/* 
	 * Scan to the start of the terminus. 
	 * This function is more or less replicated in the formatcode()
	 * for null or index formatting codes.
	 * However, we're slightly different because we might have
	 * nested escapes we need to ignore.
	 */
	stack = 0;
	for (linkstart = realend = *start; realend < end; realend++) {
		if ('<' == buf[realend])
			stack++;
		if ('>' != buf[realend])
			continue;
		else if (stack-- > 0)
			continue;
		if (dsz == 1)
			break;
		assert(realend > 0);
		if (' ' != buf[realend - 1])
			continue;
		for (i = realend, j = 0; i < end && j < dsz; j++) 
			if ('>' != buf[i++])
				break;
		if (dsz == j) 
			break;
	}

	/* Ignore stubs. */
	if (realend == end || realend == *start)
		return(0);

	/* Set linkend to the end of content. */
	linkend = dsz > 1 ? realend - 1 : realend;

	/* Re-scan to see if we have a title or section. */
	for (textsz = *start; textsz < linkend; textsz++)
		if ('|' == buf[textsz] || '/' == buf[textsz])
			break;

	if (textsz < linkend && '|' == buf[textsz]) {
		/* With title: set start, then end at section. */
		linkstart = textsz + 1;
		textsz = textsz - *start;
		for (i = linkstart; i < linkend; i++)
			if ('/' == buf[i])
				break;
		if (i < linkend)
			linkend = i;
	} else if (textsz < linkend && '/' == buf[textsz]) {
		/* With section: set end at section. */
		linkend = textsz;
		textsz = 0;
	} else
		/* No title, no section. */
		textsz = 0;

	*start = realend;
	j = linkend - linkstart;

	/* Do we have only subsection material? */
	if (0 == j && '/' == buf[linkend]) {
		linkstart = linkend + 1;
		linkend = dsz > 1 ? realend - 1 : realend;
		if (0 == (j = linkend - linkstart))
			return(0);
		printf("Sx %.*s", (int)j, &buf[linkstart]);
		return(1);
	} else if (0 == j)
		return(0);

	/* See if we qualify as being a link or not. */
	if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
		(j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
		(j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
		(j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
		(j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
		(j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
		/* Gross. */
		printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : 
			realend) - linkstart), &buf[linkstart]);
		return(1);
	} 
	
	/* See if we qualify as a mailto. */
	if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
		printf("Mt %.*s", (int)j, &buf[linkstart]);
		return(1);
	}

	/* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
	if ((j > 3 && ')' == buf[linkend - 1]) && 
		('(' == buf[linkend - 3])) {
		printf("Xr %.*s %c", (int)(j - 3), 
			&buf[linkstart], buf[linkend - 2]);
		return(1);
	} else if ((j > 4 && ')' == buf[linkend - 1]) &&
		('(' == buf[linkend - 4])) {
		printf("Xr %.*s %.*s", (int)(j - 4), 
			&buf[linkstart], 2, &buf[linkend - 3]);
		return(1);
	} else if ((j > 5 && ')' == buf[linkend - 1]) &&
		('(' == buf[linkend - 5])) {
		printf("Xr %.*s %.*s", (int)(j - 5), 
			&buf[linkstart], 3, &buf[linkend - 4]);
		return(1);
	}

	/* Last try: do we have a double-colon? */
	for (i = linkstart + 1; i < linkend; i++)
		if (':' == buf[i] && ':' == buf[i - 1])
			break;

	if (i < linkend)
		printf("Xr %.*s " PERL_SECTION, 
			(int)j, &buf[linkstart]);
	else
		printf("Xr %.*s 1", (int)j, &buf[linkstart]);

	return(1);
}

/*
 * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
 * then it's likely that we're a flag.
 * Our flag might be followed by an argument, so make sure that we're
 * accounting for that, too.
 * If we don't have a flag at all, however, then assume we're an "Ar".
 *
 * Always operates in OUST_MAC mode.
 * Mode handlinf is done by the caller.
 */
static void
dosynopsisfl(const char *buf, size_t *start, size_t end)
{
	size_t	 i;
again:
	assert(*start + 1 < end);
	assert('-' == buf[*start]);

	if ( ! isalnum((int)buf[*start + 1]) &&
		'?' != buf[*start + 1] &&
		'-' != buf[*start + 1]) {
		(*start)--;
		fputs("Ar ", stdout);
		return;
	}

	(*start)++;
	for (i = *start; i < end; i++)
		if (isalnum((int)buf[i]))
			continue;
		else if ('?' == buf[i])
			continue;
		else if ('-' == buf[i])
			continue;
		else if ('_' == buf[i])
			continue;
		else
			break;

	assert(i < end);

	if ( ! (' ' == buf[i] || '>' == buf[i])) {
		printf("Ar ");
		return;
	}

	printf("Fl ");
	if (end - *start > 1 && 
		isupper((int)buf[*start]) &&
		islower((int)buf[*start + 1]) &&
		(end - *start == 2 ||
		 ' ' == buf[*start + 2]))
		printf("\\&");
	printf("%.*s ", (int)(i - *start), &buf[*start]);
	*start = i;

	if (' ' == buf[i]) {
		while (i < end && ' ' == buf[i])
			i++;
		assert(i < end);
		if ('-' == buf[i]) {
			*start = i;
			goto again;
		}
		printf("Ar ");
		*start = i;
	}
}

/*
 * We're at the character in front of a format code, which is structured
 * like X<...> and can contain nested format codes.
 * This consumes the whole format code, and any nested format codes, til
 * the end of matched production.
 * If "nomacro", then we don't print any macros, just contained data
 * (e.g., following "Sh" or "Nm").
 * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
 * as the first format code on a line (for decoration as an "Nm"),
 * non-zero otherwise.
 *
 * Output mode handling is most complicated here.
 * We may enter in any mode.
 * We usually exit in OUST_MAC mode, except when
 * entering without OUST_MAC and the code is invalid.
 */
static int
formatcode(struct state *st, const char *buf, size_t *start, 
	size_t end, int nomacro, int pos)
{
	enum fmt	 fmt;
	size_t		 i, j, dsz;

	assert(*start + 1 < end);
	assert('<' == buf[*start + 1]);

	/* 
	 * First, look up the format code. 
	 * If it's not valid, treat it as a NOOP.
	 */
	for (fmt = 0; fmt < FMT__MAX; fmt++) 
		if (buf[*start] == fmts[fmt])
			break;

	/*
	 * Determine whether we're overriding our delimiter.
	 * According to POD, if we have more than one '<' followed by a
	 * space, then we need a space followed by matching '>' to close
	 * the expression.
	 * Otherwise we use the usual '<' and '>' matched pair.
	 */
	i = *start + 1;
	while (i < end && '<' == buf[i])
		i++;
	assert(i > *start + 1);
	dsz = i - (*start + 1);
	if (dsz > 1 && (i >= end || ' ' != buf[i]))
		dsz = 1;

	/* Remember, if dsz>1, to jump the trailing space. */
	*start += dsz + 1 + (dsz > 1 ? 1 : 0);

	/*
	 * Escapes and ignored codes (NULL and INDEX) don't print macro
	 * sequences, so just output them like normal text before
	 * processing for real macros.
	 */
	if (FMT_ESCAPE == fmt) {
		formatescape(st, buf, start, end);
		return(0);
	} else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
		/* 
		 * Just consume til the end delimiter, accounting for
		 * whether it's a custom one.
		 */
		for ( ; *start < end; (*start)++) {
			if ('>' != buf[*start])
				continue;
			else if (dsz == 1)
				break;
			assert(*start > 0);
			if (' ' != buf[*start - 1])
				continue;
			i = *start;
			for (j = 0; i < end && j < dsz; j++) 
				if ('>' != buf[i++])
					break;
			if (dsz != j) 
				continue;
			(*start) += dsz;
			break;
		}
		if (*start < end) {
			assert('>' == buf[*start]);
			(*start)++;
		}
		if (isspace(last))
			while (*start < end && isspace((int)buf[*start]))
				(*start)++;
		return(0);
	}

	/*
	 * Check whether we're supposed to print macro stuff (this is
	 * suppressed in, e.g., "Nm" and "Sh" macros).
	 */
	if (FMT__MAX != fmt && !nomacro) {

		/*
		 * We may already have wantws if there was whitespace
		 * before the code ("text B<text"), but initial
		 * whitespace inside our scope ("textB< text")
		 * allows to break at this point as well.
		 */

		st->wantws |= ' ' == buf[*start];

		/*
		 * If we are on a text line and there is no
		 * whitespace before our content, we have to make
		 * the previous word a prefix to the macro line.
		 * In the following, mdoc_newln() must not be used
		 * lest we clobber out output state.
		 */

		if (OUST_MAC != st->oust && !st->wantws) {
			if (OUST_NL != st->oust)
				putchar('\n');
			printf(".Pf ");
		}

		outbuf_flush(st);

		/* Whitespace is easier to suppress on macro lines. */

		if (OUST_MAC == st->oust && !st->wantws)
			printf(" Ns ");

		/* Unless we are on a macro line, start one. */

		if (OUST_MAC != st->oust && st->wantws) {
			if (OUST_NL != st->oust)
				putchar('\n');
			putchar('.');
		} else
			putchar(' ');

		/*
		 * Print the macro corresponding to this format code,
		 * and update the output state afterwards.
		 */

		switch (fmt) {
		case (FMT_ITALIC):
			printf("Em ");
			break;
		case (FMT_BOLD):
			if (SECT_SYNOPSIS == st->sect) { 
				if (1 == dsz && '-' == buf[*start])
					dosynopsisfl(buf, start, end);
				else if (0 == pos)
					printf("Nm ");
				else
					printf("Ar ");
				break;
			} 
			if (0 == strncmp(buf + *start, "NULL", 4) &&
			    ('=' == buf[*start + 4] ||
			     '>' == buf[*start + 4]))
				printf("Dv ");
			else
				printf("Sy ");
			break;
		case (FMT_CODE):
			printf("Qo Li ");
			break;
		case (FMT_LINK):
			/* Try to link; use "No" if it's empty. */
			if ( ! trylink(buf, start, end, dsz))
				printf("No ");
			break;
		case (FMT_FILE):
			printf("Pa ");
			break;
		case (FMT_NBSP):
			printf("No ");
			break;
		default:
			abort();
		}
		st->oust = OUST_MAC;
		st->wantws = 1;
	} else
		outbuf_flush(st);

	/*
	 * Process until we reach the end marker (e.g., '>') or until we
	 * find a nested format code.
	 * Don't emit any newlines: since we're on a macro line, we
	 * don't want to break the line.
	 */
	while (*start < end) {
		if ('>' == buf[*start] && 1 == dsz) {
			(*start)++;
			break;
		} else if ('>' == buf[*start] && 
				' ' == buf[*start - 1]) {
			/*
			 * Handle custom delimiters.
			 * These require a certain number of
			 * space-preceded carrots before we're really at
			 * the end.
			 */
			i = *start;
			for (j = 0; i < end && j < dsz; j++)
				if ('>' != buf[i++])
					break;
			if (dsz == j) {
				*start += dsz;
				break;
			}
		}
		if (*start + 1 < end && '<' == buf[*start + 1] &&
		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
			formatcode(st, buf, start, end, nomacro, 1);
			continue;
		}

		/* Suppress newlines and multiple spaces. */

		last = buf[(*start)++];
		if (' ' == last || '\n' == last) {
			putchar(' ');
			while (*start < end && ' ' == buf[*start])
				(*start)++;
			continue;
		}

		if (OUST_MAC == st->oust && FMT__MAX != fmt) {
			if ( ! st->wantws) {
				printf(" Ns ");
				st->wantws = 1;
			}

			/*
			 * Escape macro-like words.
			 * This matches "Xx " and "XxEOLN".
			 */

			if (end - *start > 0 &&
			    isupper((unsigned char)last) &&
			    islower((unsigned char)buf[*start]) &&
			    (end - *start == 1 ||
			     ' ' == buf[*start + 1] ||
			     '>' == buf[*start + 1]))
				printf("\\&");
		}

		putchar(last);

		/* Protect against character escapes. */

		if ('\\' == last)
			putchar('e');
	}

	if (FMT__MAX == fmt)
		return(0);

	if ( ! nomacro && FMT_CODE == fmt)
		printf(" Qc ");

	st->wantws = ' ' == last;
	return(1);
}

/*
 * Calls formatcode() til the end of a paragraph.
 * Goes to OUST_MAC mode and stays there when returning,
 * such that the caller can add arguments to the macro line
 * before closing it out.
 */
static void
formatcodeln(struct state *st, const char *linemac,
	const char *buf, size_t *start, size_t end, int nomacro)
{
	int	 gotmacro, wantws;

	assert(OUST_NL == st->oust);
	assert(st->wantws);
	printf(".%s ", linemac);
	st->oust = OUST_MAC;

	gotmacro = 0;
	while (*start < end)  {
		wantws = ' ' == buf[*start] || '\n' == buf[*start];
		if (wantws) {
			last = ' ';
			do {
				(*start)++;
			} while (*start < end && ' ' == buf[*start]);
		}

		if (*start + 1 < end && '<' == buf[*start + 1] &&
		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
			st->wantws |= wantws;
			gotmacro = formatcode(st, buf,
			    start, end, nomacro, 1);
			continue;
		}

		if (gotmacro) {
			if (*start < end || st->outbuflen) {
				if (st->wantws ||
				    (wantws && !st->outbuflen))
					printf(" No ");
				else
					printf(" Ns ");
			}
			gotmacro = 0;
		}
		outbuf_flush(st);
		st->wantws = wantws;

		if (*start >= end)
			break;

		if (st->wantws) {
			putchar(' ');
			st->wantws = 0;
		}

		/*
		 * Since we're already on a macro line, we want to make
		 * sure that we don't inadvertently invoke a macro.
		 * We need to do this carefully because section names
		 * are used in troff and we don't want to escape
		 * something that needn't be escaped.
		 */
		if (' ' == last && end - *start > 1 &&
		    isupper((unsigned char)buf[*start]) &&
		    islower((unsigned char)buf[*start + 1]) &&
		    (end - *start == 2 || ' ' == buf[*start + 2]))
			printf("\\&");

		putchar(last = buf[*start]);

		/* Protect against character escapes. */

		if ('\\' == last)
			putchar('e');

		(*start)++;
	}
}

/*
 * Guess at what kind of list we are.
 * These are taken straight from the POD manual.
 * I don't know what people do in real life.
 */
static enum list
listguess(const char *buf, size_t start, size_t end)
{
	size_t		 len = end - start;

	assert(end >= start);

	if (len == 1 && '*' == buf[start])
		return(LIST_BULLET);
	if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
		return(LIST_ENUM);
	else if (len == 1 && '1' == buf[start])
		return(LIST_ENUM);
	else
		return(LIST_TAG);
}

/*
 * A command paragraph, as noted in the perlpod manual, just indicates
 * that we should do something, optionally with some text to print as
 * well.
 * From the perspective of external callers,
 * always stays in OUST_NL/wantws mode,
 * but its children do use OUST_MAC.
 */
static void
command(struct state *st, const char *buf, size_t start, size_t end)
{
	size_t		 len, csz;
	enum cmd	 cmd;

	assert('=' == buf[start]);
	start++;
	len = end - start;

	for (cmd = 0; cmd < CMD__MAX; cmd++) {
		csz = strlen(cmds[cmd]);
		if (len < csz)
			continue;
		if (0 == memcmp(&buf[start], cmd[cmds], csz))
			break;
	}

	/* Ignore bogus commands. */

	if (CMD__MAX == cmd)
		return;

	start += csz;
	while (start < end && ' ' == buf[start])
		start++;

	len = end - start;

	if (st->paused) {
		st->paused = CMD_END != cmd;
		return;
	}

	switch (cmd) {
	case (CMD_POD):
		break;
	case (CMD_HEAD1):
		/*
		 * The behaviour of head= follows from a quick glance at
		 * how pod2man handles it.
		 */
		st->sect = SECT_NONE;
		if (end - start == 4) {
			if (0 == memcmp(&buf[start], "NAME", 4))
				st->sect = SECT_NAME;
		} else if (end - start == 8) {
			if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
				st->sect = SECT_SYNOPSIS;
		} 
		formatcodeln(st, "Sh", buf, &start, end, 1);
		mdoc_newln(st);
		st->haspar = 1;
		break;
	case (CMD_HEAD2):
		formatcodeln(st, "Ss", buf, &start, end, 1);
		mdoc_newln(st);
		st->haspar = 1;
		break;
	case (CMD_HEAD3):
		puts(".Pp");
		formatcodeln(st, "Em", buf, &start, end, 0);
		mdoc_newln(st);
		puts(".Pp");
		st->haspar = 1;
		break;
	case (CMD_HEAD4):
		puts(".Pp");
		formatcodeln(st, "No", buf, &start, end, 0);
		mdoc_newln(st);
		puts(".Pp");
		st->haspar = 1;
		break;
	case (CMD_OVER):
		/* 
		 * If we have an existing list that hasn't had an =item
		 * yet, then make sure that we open it now.
		 * We use the default list type, but that can't be
		 * helped (we haven't seen any items yet).
		 */
		if (st->lpos > 0)
			if (LIST__MAX == st->lstack[st->lpos - 1]) {
				st->lstack[st->lpos - 1] = LIST_TAG;
				puts(".Bl -tag -width Ds");
			}
		st->lpos++;
		assert(st->lpos < LIST_STACKSZ);
		st->lstack[st->lpos - 1] = LIST__MAX;
		break;
	case (CMD_ITEM):
		if (0 == st->lpos) {
			/* 
			 * Bad markup.
			 * Try to compensate.
			 */
			st->lstack[st->lpos] = LIST__MAX;
			st->lpos++;
		}
		assert(st->lpos > 0);
		/*
		 * If we're the first =item, guess at what our content
		 * will be: "*" is a bullet list, "1." is a numbered
		 * list, and everything is tagged.
		 */
		if (LIST__MAX == st->lstack[st->lpos - 1]) {
			st->lstack[st->lpos - 1] = 
				listguess(buf, start, end);
			switch (st->lstack[st->lpos - 1]) {
			case (LIST_BULLET):
				puts(".Bl -bullet");
				break;
			case (LIST_ENUM):
				puts(".Bl -enum");
				break;
			default:
				puts(".Bl -tag -width Ds");
				break;
			}
		}
		switch (st->lstack[st->lpos - 1]) {
		case (LIST_TAG):
			formatcodeln(st, "It", buf, &start, end, 0);
			mdoc_newln(st);
			break;
		case (LIST_ENUM):
			/* FALLTHROUGH */
		case (LIST_BULLET):
			/*
			 * Abandon the remainder of the paragraph
			 * because we're going to be a bulletted or
			 * numbered list.
			 */
			puts(".It");
			break;
		default:
			abort();
		}
		st->haspar = 1;
		break;
	case (CMD_BACK):
		/* Make sure we don't back over the stack. */
		if (st->lpos > 0) {
			st->lpos--;
			puts(".El");
		}
		break;
	case (CMD_BEGIN):
		/* 
		 * We disregard all types for now.
		 * TODO: process at least "text" in a -literal block.
		 */
		st->paused = 1;
		break;
	case (CMD_FOR):
		/* 
		 * We ignore all types of encodings and formats
		 * unilaterally.
		 */
		break;
	case (CMD_ENCODING):
		break;
	case (CMD_CUT):
		st->parsing = 0;
		return;
	default:
		abort();
	}

	/* Any command (but =cut) makes us start parsing. */
	st->parsing = 1;
}

/*
 * Just pump out the line in a verbatim block.
 * From the perspective of external callers,
 * always stays in OUST_NL/wantws mode.
 */
static void
verbatim(struct state *st, const char *buf, size_t start, size_t end)
{
	size_t		 i;

	if ( ! st->parsing || st->paused)
		return;
again:
	/* 
	 * If we're in the SYNOPSIS, see if we're an #include block.
	 * If we are, then print the "In" macro and re-loop.
	 * This handles any number of inclusions, but only when they
	 * come before the remaining parts...
	 */
	if (SECT_SYNOPSIS == st->sect) {
		i = start;
		for (i = start; i < end && ' ' == buf[i]; i++)
			/* Spin. */ ;
		if (i == end)
			return;
		/* We're an include block! */
		if (end - i > 10 && 
			0 == memcmp(&buf[i], "#include <", 10)) {
			start = i + 10;
			while (start < end && ' ' == buf[start])
				start++;
			fputs(".In ", stdout);
			/* Stop til the '>' marker or we hit eoln. */
			while (start < end && 
				'>' != buf[start] && '\n' != buf[start])
				putchar(buf[start++]);
			putchar('\n');
			if (start < end && '>' == buf[start])
				start++;
			if (start < end && '\n' == buf[start])
				start++;
			if (start < end) 
				goto again;
			return;
		}
	}
	
	if (start == end)
		return;
	puts(".Bd -literal");
	for (last = ' '; start < end; start++) {
		/*
		 * Handle accidental macros (newline starting with
		 * control character) and escapes.
		 */
		if ('\n' == last)
			if ('.' == buf[start] || '\'' == buf[start])
				printf("\\&");
		putchar(last = buf[start]);
		if ('\\' == buf[start])
			printf("e");
	}
	putchar(last = '\n');
	puts(".Ed");
}

/*
 * See dosynopsisop().
 */
static int
hasmatch(const char *buf, size_t start, size_t end)
{
	size_t	 stack;

	for (stack = 0; start < end; start++) 
		if (buf[start] == '[')
			stack++;
		else if (buf[start] == ']' && 0 == stack)
			return(1);
		else if (buf[start] == ']')
			stack--;
	return(0);
}

/*
 * If we're in the SYNOPSIS section and we've encounter braces in an
 * ordinary paragraph, then try to see whether we're an [-option].
 * Do this, if we're an opening bracket, by first seeing if we have a
 * matching end via hasmatch().
 * If we're an ending bracket, see if we have a stack already.
 */
static int
dosynopsisop(struct state *st, const char *buf,
	size_t *start, size_t end, size_t *opstack)
{

	assert('[' == buf[*start] || ']' == buf[*start]);

	if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
		mdoc_newln(st);
		puts(".Oo");
		(*opstack)++;
	} else if ('[' == buf[*start])
		return(0);

	if (']' == buf[*start] && *opstack > 0) {
		mdoc_newln(st);
		puts(".Oc");
		(*opstack)--;
	} else if (']' == buf[*start])
		return(0);

	(*start)++;
	last = '\n';
	while (' ' == buf[*start])
		(*start)++;
	return(1);
}

/*
 * Format multiple "Nm" manpage names in the NAME section.
 * From the perspective of external callers,
 * always stays in OUST_NL/wantws mode,
 * but its children do use OUST_MAC.
 */
static void
donamenm(struct state *st, const char *buf, size_t *start, size_t end)
{
	size_t	 word;

	assert(OUST_NL == st->oust);
	assert(st->wantws);

	while (*start < end && ' ' == buf[*start])
		(*start)++;

	if (end == *start) {
		puts(".Nm unknown");
		return;
	}

	while (*start < end) {
		for (word = *start; word < end; word++)
			if (',' == buf[word])
				break;
		formatcodeln(st, "Nm", buf, start, word, 1);
		if (*start == end) {
			mdoc_newln(st);
			break;
		}
		assert(',' == buf[*start]);
		printf(" ,");
		mdoc_newln(st);
		(*start)++;
		while (*start < end && ' ' == buf[*start])
			(*start)++;
	}
}

/*
 * Ordinary paragraph.
 * Well, this is really the hardest--POD seems to assume that, for
 * example, a leading space implies a newline, and so on.
 * Lots of other snakes in the grass: escaping a newline followed by a
 * period (accidental mdoc(7) control), double-newlines after macro
 * passages, etc.
 *
 * Uses formatcode() to go to OUST_MAC mode
 * and outbuf_flush() to go to OUST_TXT mode.
 * Main text mode wantws handling is in this function.
 * Must make sure to go back to OUST_NL/wantws mode before returning.
 */
static void
ordinary(struct state *st, const char *buf, size_t start, size_t end)
{
	size_t		i, j, opstack;
	int		seq;

	if ( ! st->parsing || st->paused)
		return;

	/*
	 * Special-case: the NAME section.
	 * If we find a "-" when searching from the end, assume that
	 * we're in "name - description" format.
	 * To wit, print out a "Nm" and "Nd" in that format.
	 */
	if (SECT_NAME == st->sect) {
		for (i = end - 2; i > start; i--)
			if ('-' == buf[i] && ' ' == buf[i + 1])
				break;
		if ('-' == buf[i]) {
			j = i;
			/* Roll over multiple "-". */
			for ( ; i > start; i--)
				if ('-' != buf[i])
					break;
			donamenm(st, buf, &start, i + 1);
			start = j + 1;
			while (start < end && ' ' == buf[start])
				start++;
			formatcodeln(st, "Nd", buf, &start, end, 1);
			mdoc_newln(st);
			return;
		}
	}

	if ( ! st->haspar)
		puts(".Pp");

	st->haspar = 0;
	last = '\n';
	opstack = 0;

	for (seq = 0; start < end; seq++) {
		/* 
		 * Loop til we get either to a newline or escape. 
		 * Escape initial control characters.
		 */
		while (start < end) {
			if (start < end - 1 && '<' == buf[start + 1] &&
			    'A' <= buf[start] && 'Z' >= buf[start])
				break;
			else if ('\n' == buf[start])
				break;
			else if ('\n' == last && '.' == buf[start])
				outbuf_addstr(st, "\\&");
			else if ('\n' == last && '\'' == buf[start])
				outbuf_addstr(st, "\\&");
			/*
			 * If we're in the SYNOPSIS, have square
			 * brackets indicate that we're opening and
			 * closing an optional context.
			 */

			if (SECT_SYNOPSIS == st->sect &&
				('[' == buf[start] || 
				 ']' == buf[start]) &&
				dosynopsisop(st, buf,
				    &start, end, &opstack))
				continue;

			/*
			 * On whitespace, flush the output buffer
			 * and allow breaking to a macro line.
			 * Otherwise, buffer text and clear wantws.
			 */

			last = buf[start++];
			if (' ' == last) {
				outbuf_flush(st);
				putchar(' ');
				st->wantws = 1;
			} else
				outbuf_addchar(st);
		}

		if (start < end - 1 && '<' == buf[start + 1] &&
		    'A' <= buf[start] && 'Z' >= buf[start]) {
			formatcode(st, buf, &start, end, 0, seq);
			if (OUST_MAC == st->oust) {
				/*
				 * Let mdoc(7) handle trailing punctuation.
				 * XXX Some punctuation characters
				 *     are not handled yet.
				 */
				if ((start == end - 1 || 
					(start < end - 1 && 
					 (' ' == buf[start + 1] ||
					  '\n' == buf[start + 1]))) &&
					('.' == buf[start] ||
					 ',' == buf[start])) {
					putchar(' ');
					putchar(buf[start++]);
				}

				if (st->wantws ||
				    ' ' == buf[start] ||
				    '\n' == buf[start])
					mdoc_newln(st);

				/*
				 * Consume all whitespace
				 * so we don't accidentally start
				 * an implicit literal line.
				 */

				while (start < end && ' ' == buf[start])
					start++;

				/*
				 * Some text is following.
				 * Implement requested spacing.
				 */

				if ( ! st->wantws && start < end &&
				    ('<' != buf[start + 1] ||
				     'A' > buf[start] ||
				     'Z' < buf[start])) {
					printf(" Ns ");
					st->wantws = 1;
				}
			}
		} else if (start < end && '\n' == buf[start]) {
			outbuf_flush(st);
			mdoc_newln(st);
			if (++start >= end)
				continue;
			/*
			 * If we have whitespace next, eat it to prevent
			 * mdoc(7) from thinking that it's meant for
			 * verbatim text.
			 * It is--but if we start with that, we can't
			 * have a macro subsequent it, which may be
			 * possible if we have an escape next.
			 */
			if (' ' == buf[start] || '\t' == buf[start])
				puts(".br");
			for ( ; start < end; start++)
				if (' ' != buf[start] && '\t' != buf[start])
					break;
		} 
	}
	outbuf_flush(st);
	mdoc_newln(st);
}

/*
 * There are three kinds of paragraphs: verbatim (starts with whitespace
 * of some sort), ordinary (starts without "=" marker), or a command
 * (default: starts with "=").
 */
static void
dopar(struct state *st, const char *buf, size_t start, size_t end)
{

	assert(OUST_NL == st->oust);
	assert(st->wantws);

	if (end == start)
		return;
	if (' ' == buf[start] || '\t' == buf[start])
		verbatim(st, buf, start, end);
	else if ('=' != buf[start])
		ordinary(st, buf, start, end);
	else
		command(st, buf, start, end);
}

/*
 * Loop around paragraphs within a document, processing each one in the
 * POD way.
 */
static void
dofile(const struct args *args, const char *fname, 
	const struct tm *tm, const char *buf, size_t sz)
{
	char		 datebuf[64];
	struct state	 st;
	const char	*fbase, *fext, *section, *date;
	char		*title, *cp;
	size_t		 sup, end, i, cur = 0;

	if (0 == sz)
		return;

	/*
	 * Parsing the filename is almost always required,
	 * except when both the title and the section
	 * are provided on the command line.
	 */

	if (NULL == args->title || NULL == args->section) {
		fbase = strrchr(fname, '/');
		if (NULL == fbase)
			fbase = fname;
		else
			fbase++;
		fext = strrchr(fbase, '.');
	} else
		fext = NULL;

	/*
	 * The title will be converted to uppercase,
	 * so it needs to be copied.
	 */

	title = (NULL != args->title) ? strdup(args->title) :
		(NULL != fext) ? strndup(fbase, fext - fbase) :
		strdup(fbase);

	if (NULL == title) {
		perror(NULL);
		exit(EXIT_FAILURE);
	}

	/* Section is 1 unless suffix is "pm". */

	section = (NULL != args->section) ? args->section :
	    (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
	    PERL_SECTION;

	/* Date.  Or the given "tm" if not supplied. */

	if (NULL == (date = args->date)) {
		strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
		date = datebuf;
	}

	for (cp = title; '\0' != *cp; cp++)
		*cp = toupper((int)*cp);

	/* The usual mdoc(7) preamble. */

	printf(".Dd %s\n", date);
	printf(".Dt %s %s\n", title, section);
	puts(".Os");

	free(title);

	memset(&st, 0, sizeof(struct state));
	st.oust = OUST_NL;
	st.wantws = 1;

	assert(sz > 0);

	/* Main loop over file contents. */

	while (cur < sz) {
		/* Read until next paragraph. */
		for (i = cur + 1; i < sz; i++)
			if ('\n' == buf[i] && '\n' == buf[i - 1]) {
				/* Consume blank paragraphs. */
				while (i + 1 < sz && '\n' == buf[i + 1])
					i++;
				break;
			}
		
		/* Adjust end marker for EOF. */
		end = i < sz ? i - 1 : 
			('\n' == buf[sz - 1] ? sz - 1 : sz);
		sup = i < sz ? end + 2 : sz;

		/* Process paragraph and adjust start. */
		dopar(&st, buf, cur, end);
		cur = sup;
	}
}

/*
 * Read a single file fully into memory.
 * If the file is "-", do it from stdin.
 * If successfully read, send the input buffer to dofile() for further
 * processing.
 */
static int
readfile(const struct args *args, const char *fname)
{
	int		 fd;
	char		*buf;
	size_t		 bufsz, cur;
	ssize_t		 ssz;
	struct tm	*tm;
	time_t		 ttm;
	struct stat 	 st;

	fd = 0 != strcmp("-", fname) ? 
		open(fname, O_RDONLY, 0) : STDIN_FILENO;

	if (-1 == fd) {
		perror(fname);
		return(0);
	}

	if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
		ttm = time(NULL);
		tm = localtime(&ttm);
	} else
		tm = localtime(&st.st_mtime);

	/* 
	 * Arbitrarily-sized initial buffer.
	 * Should be big enough for most files...
	 */
	cur = 0;
	bufsz = 1 << 14;
	if (NULL == (buf = malloc(bufsz))) {
		perror(NULL);
		exit(EXIT_FAILURE);
	}

	while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
		/* Double buffer size on fill. */
		if ((size_t)ssz == bufsz - cur)  {
			bufsz *= 2;
			if (NULL == (buf = realloc(buf, bufsz))) {
				perror(NULL);
				exit(EXIT_FAILURE);
			}
		}
		cur += (size_t)ssz;
	}
	if (ssz < 0) {
		perror(fname);
		free(buf);
		return(0);
	}

	dofile(args, STDIN_FILENO == fd ? 
		"STDIN" : fname, tm, buf, cur);
	free(buf);
	if (STDIN_FILENO != fd)
		close(fd);
	return(1);
}

int
main(int argc, char *argv[])
{
	const char	*fname, *name;
	struct args	 args;
	int		 c;

	name = strrchr(argv[0], '/');
	if (name == NULL)
		name = argv[0];
	else
		++name;

	memset(&args, 0, sizeof(struct args));
	fname = "-";

	/* Accept no arguments for now. */

	while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
		switch (c) {
		case ('h'):
			/* FALLTHROUGH */
		case ('l'):
			/* FALLTHROUGH */
		case ('c'):
			/* FALLTHROUGH */
		case ('o'):
			/* FALLTHROUGH */
		case ('q'):
			/* FALLTHROUGH */
		case ('r'):
			/* FALLTHROUGH */
		case ('u'):
			/* FALLTHROUGH */
		case ('v'):
			/* Ignore these. */
			break;
		case ('d'):
			args.date = optarg;
			break;
		case ('n'):
			args.title = optarg;
			break;
		case ('s'):
			args.section = optarg;
			break;
		default:
			goto usage;
		}

	argc -= optind;
	argv += optind;

	/* Accept only a single input file. */

	if (argc > 1)
		goto usage;
	else if (1 == argc)
		fname = *argv;

	return(readfile(&args, fname) ? 
		EXIT_SUCCESS : EXIT_FAILURE);

usage:
	fprintf(stderr, "usage: %s [-d date] " 
	    "[-n title] [-s section] [file]\n", name);

	return(EXIT_FAILURE);
}