===================================================================
RCS file: /cvs/pod2mdoc/pod2mdoc.c,v
retrieving revision 1.19
retrieving revision 1.29
diff -u -p -r1.19 -r1.29
--- pod2mdoc/pod2mdoc.c	2014/04/03 10:17:14	1.19
+++ pod2mdoc/pod2mdoc.c	2014/07/11 20:45:55	1.29
@@ -1,4 +1,4 @@
-/*	$Id: pod2mdoc.c,v 1.19 2014/04/03 10:17:14 kristaps Exp $ */
+/*	$Id: pod2mdoc.c,v 1.29 2014/07/11 20:45:55 schwarze Exp $ */
 /*
  * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -179,18 +179,25 @@ formatescape(const char *buf, size_t *start, size_t en
 static int
 trylink(const char *buf, size_t *start, size_t end, size_t dsz)
 {
-	size_t		 linkstart, realend, linkend, i, j, textsz;
-	const char	*text;
+	size_t		 linkstart, realend, linkend, 
+			 i, j, textsz, stack;
 
 	/* 
 	 * Scan to the start of the terminus. 
 	 * This function is more or less replicated in the formatcode()
 	 * for null or index formatting codes.
+	 * However, we're slightly different because we might have
+	 * nested escapes we need to ignore.
 	 */
+	stack = 0;
 	for (linkstart = realend = *start; realend < end; realend++) {
+		if ('<' == buf[realend])
+			stack++;
 		if ('>' != buf[realend])
 			continue;
-		else if (dsz == 1)
+		else if (stack-- > 0)
+			continue;
+		if (dsz == 1)
 			break;
 		assert(realend > 0);
 		if (' ' != buf[realend - 1])
@@ -210,13 +217,12 @@ trylink(const char *buf, size_t *start, size_t end, si
 	linkend = dsz > 1 ? realend - 1 : realend;
 
 	/* Re-scan to see if we have a title or section. */
-	text = &buf[*start];
 	for (textsz = *start; textsz < linkend; textsz++)
 		if ('|' == buf[textsz] || '/' == buf[textsz])
 			break;
 
-	/* If we have a title, find the section. */
 	if (textsz < linkend && '|' == buf[textsz]) {
+		/* With title: set start, then end at section. */
 		linkstart = textsz + 1;
 		textsz = textsz - *start;
 		for (i = linkstart; i < linkend; i++)
@@ -224,32 +230,43 @@ trylink(const char *buf, size_t *start, size_t end, si
 				break;
 		if (i < linkend)
 			linkend = i;
-	} else {
+	} else if (textsz < linkend && '/' == buf[textsz]) {
+		/* With section: set end at section. */
+		linkend = textsz;
 		textsz = 0;
-		if (textsz < linkend && '/' == buf[textsz])
-			linkend = textsz;
-	}
+	} else
+		/* No title, no section. */
+		textsz = 0;
 
 	*start = realend;
-
 	j = linkend - linkstart;
 
-	if (0 == j)
+	/* Do we have only subsection material? */
+	if (0 == j && '/' == buf[linkend]) {
+		linkstart = linkend + 1;
+		linkend = dsz > 1 ? realend - 1 : realend;
+		if (0 == (j = linkend - linkstart))
+			return(0);
+		printf("Sx %.*s", (int)j, &buf[linkstart]);
+		return(1);
+	} else if (0 == j)
 		return(0);
 
 	/* See if we qualify as being a link or not. */
-	if ((j > 5 && 0 == memcmp("http:", &buf[linkstart], j)) ||
-		(j > 6 && 0 == memcmp("https:", &buf[linkstart], j)) ||
-		(j > 4 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
-		(j > 5 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
-		(j > 4 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
-		(j > 4 && 0 == memcmp("afs:", &buf[linkstart], j))) {
-		printf("Lk %.*s", (int)j, &buf[linkstart]);
+	if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
+		(j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
+		(j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
+		(j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
+		(j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
+		(j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
+		/* Gross. */
+		printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : 
+			realend) - linkstart), &buf[linkstart]);
 		return(1);
 	} 
 	
 	/* See if we qualify as a mailto. */
-	if (j > 7 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
+	if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
 		printf("Mt %.*s", (int)j, &buf[linkstart]);
 		return(1);
 	}
@@ -441,6 +458,13 @@ formatcode(struct state *st, const char *buf, size_t *
 			(*start) += dsz;
 			break;
 		}
+		if (*start < end) {
+			assert('>' == buf[*start]);
+			(*start)++;
+		}
+		if (isspace(last))
+			while (*start < end && isspace((int)buf[*start]))
+				(*start)++;
 		return(0);
 	}
 
@@ -485,7 +509,12 @@ formatcode(struct state *st, const char *buf, size_t *
 					printf("Ar ");
 				break;
 			} 
-			printf("Sy ");
+			if (0 == strncmp(buf + *start, "NULL", 4) &&
+			    ('=' == buf[*start + 4] ||
+			     '>' == buf[*start + 4]))
+				printf("Dv ");
+			else
+				printf("Sy ");
 			break;
 		case (FMT_CODE):
 			printf("Qo Li ");
@@ -837,10 +866,47 @@ static void
 verbatim(struct state *st, const char *buf, size_t start, size_t end)
 {
 	int		 last;
+	size_t		 i;
 
 	if ( ! st->parsing || st->paused)
 		return;
-
+again:
+	/* 
+	 * If we're in the SYNOPSIS, see if we're an #include block.
+	 * If we are, then print the "In" macro and re-loop.
+	 * This handles any number of inclusions, but only when they
+	 * come before the remaining parts...
+	 */
+	if (SECT_SYNOPSIS == st->sect) {
+		i = start;
+		for (i = start; i < end && ' ' == buf[i]; i++)
+			/* Spin. */ ;
+		if (i == end)
+			return;
+		/* We're an include block! */
+		if (end - i > 10 && 
+			0 == memcmp(&buf[i], "#include <", 10)) {
+			start = i + 10;
+			while (start < end && ' ' == buf[start])
+				start++;
+			fputs(".In ", stdout);
+			/* Stop til the '>' marker or we hit eoln. */
+			while (start < end && 
+				'>' != buf[start] && '\n' != buf[start])
+				putchar(buf[start++]);
+			putchar('\n');
+			if (start < end && '>' == buf[start])
+				start++;
+			if (start < end && '\n' == buf[start])
+				start++;
+			if (start < end) 
+				goto again;
+			return;
+		}
+	}
+	
+	if (start == end)
+		return;
 	puts(".Bd -literal");
 	for (last = ' '; start < end; start++) {
 		/*
@@ -1112,24 +1178,40 @@ static void
 dofile(const struct args *args, const char *fname, 
 	const struct tm *tm, const char *buf, size_t sz)
 {
-	size_t		 sup, end, i, cur = 0;
-	struct state	 st;
-	const char	*section, *date;
 	char		 datebuf[64];
+	struct state	 st;
+	const char	*fbase, *fext, *section, *date;
 	char		*title, *cp;
+	size_t		 sup, end, i, cur = 0;
 
 	if (0 == sz)
 		return;
 
-	/* Title is last path component of the filename. */
+	/*
+	 * Parsing the filename is almost always required,
+	 * except when both the title and the section
+	 * are provided on the command line.
+	 */
 
-	if (NULL != args->title)
-		title = strdup(args->title);
-	else if (NULL != (cp = strrchr(fname, '/')))
-		title = strdup(cp + 1);
-	else
-		title = strdup(fname);
-	
+	if (NULL == args->title || NULL == args->section) {
+		fbase = strrchr(fname, '/');
+		if (NULL == fbase)
+			fbase = fname;
+		else
+			fbase++;
+		fext = strrchr(fbase, '.');
+	} else
+		fext = NULL;
+
+	/*
+	 * The title will be converted to uppercase,
+	 * so it needs to be copied.
+	 */
+
+	title = (NULL != args->title) ? strdup(args->title) :
+		(NULL != fext) ? strndup(fbase, fext - fbase) :
+		strdup(fbase);
+
 	if (NULL == title) {
 		perror(NULL);
 		exit(EXIT_FAILURE);
@@ -1137,14 +1219,9 @@ dofile(const struct args *args, const char *fname, 
 
 	/* Section is 1 unless suffix is "pm". */
 
-	if (NULL == (section = args->section)) {
-		section = "1";
-		if (NULL != (cp = strrchr(title, '.'))) {
-			*cp++ = '\0';
-			if (0 == strcmp(cp, "pm"))
-				section = PERL_SECTION;
-		}
-	} 
+	section = (NULL != args->section) ? args->section :
+	    (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
+	    PERL_SECTION;
 
 	/* Date.  Or the given "tm" if not supplied. */
 
@@ -1207,8 +1284,6 @@ readfile(const struct args *args, const char *fname)
 	time_t		 ttm;
 	struct stat 	 st;
 
-	assert(NULL != fname);
-
 	fd = 0 != strcmp("-", fname) ? 
 		open(fname, O_RDONLY, 0) : STDIN_FILENO;
 
@@ -1314,8 +1389,8 @@ main(int argc, char *argv[])
 
 	/* Accept only a single input file. */
 
-	if (argc > 2)
-		return(EXIT_FAILURE);
+	if (argc > 1)
+		goto usage;
 	else if (1 == argc)
 		fname = *argv;
 
@@ -1324,7 +1399,7 @@ main(int argc, char *argv[])
 
 usage:
 	fprintf(stderr, "usage: %s [-d date] " 
-		"[-n title] [-s section]\n", name);
+	    "[-n title] [-s section] [file]\n", name);
 
 	return(EXIT_FAILURE);
 }