===================================================================
RCS file: /cvs/mandoc/Attic/mdocterm.c,v
retrieving revision 1.25
retrieving revision 1.36
diff -u -p -r1.25 -r1.36
--- mandoc/Attic/mdocterm.c	2009/03/02 17:14:46	1.25
+++ mandoc/Attic/mdocterm.c	2009/03/08 18:02:36	1.36
@@ -1,4 +1,4 @@
-/* $Id: mdocterm.c,v 1.25 2009/03/02 17:14:46 kristaps Exp $ */
+/* $Id: mdocterm.c,v 1.36 2009/03/08 18:02:36 kristaps Exp $ */
 /*
  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
  *
@@ -16,8 +16,6 @@
  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  * PERFORMANCE OF THIS SOFTWARE.
  */
-#include <sys/utsname.h>
-
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
@@ -32,6 +30,11 @@
 #include "mmain.h"
 #include "term.h"
 
+struct	termenc {
+	const char	 *enc;
+	int		  sym;
+};
+
 static	void		  body(struct termp *,
 				struct termpair *,
 				const struct mdoc_meta *,
@@ -44,19 +47,82 @@ static	void		  footer(struct termp *,
 static	void		  pword(struct termp *, const char *, size_t);
 static	void		  pescape(struct termp *, const char *, 
 				size_t *, size_t);
-static	void		  style(struct termp *, enum tstyle);
 static	void		  nescape(struct termp *,
 				const char *, size_t);
 static	void		  chara(struct termp *, char);
 static	void		  stringa(struct termp *, 
 				const char *, size_t);
 static	void		  symbola(struct termp *, enum tsym);
+static	void		  stylea(struct termp *, enum tstyle);
 
 #ifdef __linux__
 extern	size_t		  strlcat(char *, const char *, size_t);
 extern	size_t		  strlcpy(char *, const char *, size_t);
 #endif
 
+static	struct termenc	  termenc1[] = {
+	{ "\\",		  TERMSYM_SLASH },
+	{ "\'",		  TERMSYM_RSQUOTE },
+	{ "`",		  TERMSYM_LSQUOTE },
+	{ "-",		  TERMSYM_HYPHEN },
+	{ " ",		  TERMSYM_SPACE },
+	{ ".",		  TERMSYM_PERIOD },
+	{ "&",		  TERMSYM_BREAK },
+	{ "e",		  TERMSYM_SLASH },
+	{ "q",		  TERMSYM_DQUOTE },
+	{ NULL,		  0 }
+};
+
+static	struct termenc	  termenc2[] = {
+	{ "rB", 	  TERMSYM_RBRACK },
+	{ "lB", 	  TERMSYM_LBRACK },
+	{ "ra", 	  TERMSYM_RANGLE },
+	{ "la", 	  TERMSYM_LANGLE },
+	{ "Lq", 	  TERMSYM_LDQUOTE },
+	{ "lq", 	  TERMSYM_LDQUOTE },
+	{ "Rq", 	  TERMSYM_RDQUOTE },
+	{ "rq", 	  TERMSYM_RDQUOTE },
+	{ "oq", 	  TERMSYM_LSQUOTE },
+	{ "aq", 	  TERMSYM_RSQUOTE },
+
+	{ "<-", 	  TERMSYM_LARROW },
+	{ "->", 	  TERMSYM_RARROW },
+	{ "ua", 	  TERMSYM_UARROW },
+	{ "da", 	  TERMSYM_DARROW },
+
+	{ "bu", 	  TERMSYM_BULLET },
+	{ "Ba", 	  TERMSYM_BAR },
+	{ "ba", 	  TERMSYM_BAR },
+	{ "co", 	  TERMSYM_COPY },
+	{ "Am", 	  TERMSYM_AMP },
+
+	{ "Le", 	  TERMSYM_LE },
+	{ "<=", 	  TERMSYM_LE },
+	{ "Ge", 	  TERMSYM_GE },
+	{ ">=", 	  TERMSYM_GE },
+	{ "==", 	  TERMSYM_EQ },
+	{ "Ne", 	  TERMSYM_NEQ },
+	{ "!=", 	  TERMSYM_NEQ },
+	{ "Pm", 	  TERMSYM_PLUSMINUS },
+	{ "+-", 	  TERMSYM_PLUSMINUS },
+	{ "If", 	  TERMSYM_INF2 },
+	{ "if", 	  TERMSYM_INF },
+	{ "Na", 	  TERMSYM_NAN },
+	{ "na", 	  TERMSYM_NAN },
+	{ "**", 	  TERMSYM_ASTERISK },
+	{ "Gt", 	  TERMSYM_GT },
+	{ "Lt", 	  TERMSYM_LT },
+
+	{ "aa", 	  TERMSYM_ACUTE },
+	{ "ga", 	  TERMSYM_GRAVE },
+
+	{ "en", 	  TERMSYM_EN },
+	{ "em", 	  TERMSYM_EM },
+
+	{ "Pi", 	  TERMSYM_PI },
+	{ NULL,		  0 }
+};
+
 static	struct termsym	  termsym_ansi[] = {
 	{ "]", 1 },		/* TERMSYM_RBRACK */
 	{ "[", 1 },		/* TERMSYM_LBRACK */
@@ -85,8 +151,18 @@ static	struct termsym	  termsym_ansi[] = {
 	{ "NaN", 3 },		/* TERMSYM_NAN */
 	{ "|", 1 },		/* TERMSYM_BAR */
 	{ "o", 1 },		/* TERMSYM_BULLET */
-	{ "&", 1 },		/* TERMSYM_AND */
-	{ "|", 1 },		/* TERMSYM_OR */
+	{ "&", 1 },		/* TERMSYM_AMP */
+	{ "--", 2 },		/* TERMSYM_EM */
+	{ "-", 1 },		/* TERMSYM_EN */
+	{ "(C)", 3 },		/* TERMSYM_COPY */
+	{ "*", 1 },		/* TERMSYM_ASTERISK */
+	{ "\\", 1 },		/* TERMSYM_SLASH */
+	{ "-", 1 },		/* TERMSYM_HYPHEN */
+	{ " ", 1 },		/* TERMSYM_SPACE */
+	{ ".", 1 },		/* TERMSYM_PERIOD */
+	{ "", 0 },		/* TERMSYM_BREAK */
+	{ "<", 1 },		/* TERMSYM_LANGLE */
+	{ ">", 1 },		/* TERMSYM_RANGLE */
 };
 
 static	const char	  ansi_clear[]  = { 27, '[', '0', 'm' };
@@ -103,20 +179,21 @@ static	struct termsym	  termstyle_ansi[] = {
 int
 main(int argc, char *argv[])
 {
-	struct mmain	*p;
+	struct mmain	  *p;
+	int		   c;
 	const struct mdoc *mdoc;
-	struct termp	 termp;
+	struct termp	   termp;
 
 	p = mmain_alloc();
 
-	if ( ! mmain_getopt(p, argc, argv, NULL, NULL, NULL, NULL))
-		mmain_exit(p, 1);
+	c = mmain_getopt(p, argc, argv, NULL, NULL, NULL, NULL);
+	if (1 != c)
+		mmain_exit(p, -1 == c ? 1 : 0);
 
 	if (NULL == (mdoc = mmain_mdoc(p)))
 		mmain_exit(p, 1);
 
-	termp.maxrmargin = 78; /* XXX */
-	termp.rmargin = termp.maxrmargin;
+	termp.maxrmargin = termp.rmargin = 78; /* XXX */
 	termp.maxcols = 1024;
 	termp.offset = termp.col = 0;
 	termp.flags = TERMP_NOSPACE;
@@ -153,9 +230,6 @@ main(int argc, char *argv[])
  *    offset value.  This is useful when doing columnar lists where the
  *    prior column has right-padded.
  *
- *  - TERMP_LITERAL: don't break apart words.  Note that a long literal
- *    word will violate the right margin.
- *
  *  - TERMP_NOBREAK: this is the most important and is used when making
  *    columns.  In short: don't print a newline and instead pad to the
  *    right margin.  Used in conjunction with TERMP_NOLPAD.
@@ -213,7 +287,7 @@ flushln(struct termp *p)
 
 		/* LINTED */
 		for (j = i, vsz = 0; j < p->col; j++) {
-			if (isspace((int)p->buf[j]))
+			if (isspace((u_char)p->buf[j]))
 				break;
 			else if (27 == p->buf[j]) {
 				assert(j + 4 <= p->col);
@@ -246,7 +320,7 @@ flushln(struct termp *p)
 				putchar('\n');
 				for (j = 0; j < p->rmargin; j++)
 					putchar(' ');
-				vis = p->rmargin;
+				vis = p->rmargin - p->offset;
 			} else if (vis + vsz > bp) 
 				warnx("word breaks right margin");
 
@@ -260,7 +334,7 @@ flushln(struct termp *p)
 		 */
 
 		for ( ; i < p->col; i++) {
-			if (isspace((int)p->buf[i]))
+			if (isspace((u_char)p->buf[i]))
 				break;
 			putchar(p->buf[i]);
 		}
@@ -277,9 +351,11 @@ flushln(struct termp *p)
 	 */
 
 	if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
-		putchar('\n');
-		for (i = 0; i < p->rmargin; i++)
-			putchar(' ');
+		if ( ! (TERMP_NONOBREAK & p->flags)) {
+			putchar('\n');
+			for (i = 0; i < p->rmargin; i++)
+				putchar(' ');
+		}
 		p->col = 0;
 		return;
 	}
@@ -290,8 +366,9 @@ flushln(struct termp *p)
 	 */
 
 	if (p->flags & TERMP_NOBREAK) {
-		for ( ; vis < maxvis; vis++)
-			putchar(' ');
+		if ( ! (TERMP_NONOBREAK & p->flags))
+			for ( ; vis < maxvis; vis++)
+				putchar(' ');
 	} else
 		putchar('\n');
 
@@ -360,13 +437,13 @@ word(struct termp *p, const char *word)
 
 	/* LINTED */
 	for (j = i = 0; i < len; i++) {
-		if ( ! isspace((int)word[i])) {
+		if ( ! isspace((u_char)word[i])) {
 			j++;
 			continue;
 		} 
 		
 		/* Escaped spaces don't delimit... */
-		if (i > 0 && isspace((int)word[i]) && 
+		if (i > 0 && isspace((u_char)word[i]) && 
 				'\\' == word[i - 1]) {
 			j++;
 			continue;
@@ -413,7 +490,7 @@ body(struct termp *p, struct termpair *ppair,
 			if ( ! (*termacts[node->tok].pre)(p, &pair, meta, node))
 				dochild = 0;
 	} else /* MDOC_TEXT == node->type */
-		word(p, node->data.text.string);
+		word(p, node->string);
 
 	/* Children. */
 
@@ -493,9 +570,7 @@ footer(struct termp *p, const struct mdoc_meta *meta)
 static void
 header(struct termp *p, const struct mdoc_meta *meta)
 {
-	char		*buf, *title, *bufp, *vbuf;
-	const char	*pp;
-	struct utsname	 uts;
+	char		*buf, *title, *bufp;
 
 	p->rmargin = p->maxrmargin;
 	p->offset = 0;
@@ -504,49 +579,7 @@ header(struct termp *p, const struct mdoc_meta *meta)
 		err(1, "malloc");
 	if (NULL == (title = malloc(p->rmargin)))
 		err(1, "malloc");
-	if (NULL == (vbuf = malloc(p->rmargin)))
-		err(1, "malloc");
 
-	if (NULL == (pp = mdoc_vol2a(meta->vol))) {
-		switch (meta->msec) {
-		case (MSEC_1):
-			/* FALLTHROUGH */
-		case (MSEC_6):
-			/* FALLTHROUGH */
-		case (MSEC_7):
-			pp = mdoc_vol2a(VOL_URM);
-			break;
-		case (MSEC_8):
-			pp = mdoc_vol2a(VOL_SMM);
-			break;
-		case (MSEC_2):
-			/* FALLTHROUGH */
-		case (MSEC_3):
-			/* FALLTHROUGH */
-		case (MSEC_4):
-			/* FALLTHROUGH */
-		case (MSEC_5):
-			pp = mdoc_vol2a(VOL_PRM);
-			break;
-		case (MSEC_9):
-			pp = mdoc_vol2a(VOL_KM);
-			break;
-		default:
-			break;
-		}
-	}
-	vbuf[0] = 0;
-
-	if (pp) {
-		if (-1 == uname(&uts)) 
-			err(1, "uname");
-		(void)strlcat(vbuf, uts.sysname, p->rmargin);
-		(void)strlcat(vbuf, " ", p->rmargin);
-	} else if (NULL == (pp = mdoc_msec2a(meta->msec)))
-		pp = mdoc_msec2a(MSEC_local);
-
-	(void)strlcat(vbuf, pp, p->rmargin);
-
 	/*
 	 * The header is strange.  It has three components, which are
 	 * really two with the first duplicated.  It goes like this:
@@ -560,19 +593,20 @@ header(struct termp *p, const struct mdoc_meta *meta)
 	 * switches on the manual section.
 	 */
 
-	if (mdoc_arch2a(meta->arch))
-		(void)snprintf(buf, p->rmargin, "%s (%s)",
-				vbuf, mdoc_arch2a(meta->arch));
-	else
-		(void)strlcpy(buf, vbuf, p->rmargin);
+	assert(meta->vol);
+	(void)strlcpy(buf, meta->vol, p->rmargin);
 
-	pp = mdoc_msec2a(meta->msec);
+	if (meta->arch) {
+		(void)strlcat(buf, " (", p->rmargin);
+		(void)strlcat(buf, meta->arch, p->rmargin);
+		(void)strlcat(buf, ")", p->rmargin);
+	}
 
-	(void)snprintf(title, p->rmargin, "%s(%s)",
-			meta->title, pp ? pp : "");
+	(void)snprintf(title, p->rmargin, "%s(%d)", 
+			meta->title, meta->msec);
 
 	for (bufp = title; *bufp; bufp++)
-		*bufp = toupper(*bufp);
+		*bufp = toupper((u_char)*bufp);
 	
 	p->offset = 0;
 	p->rmargin = (p->maxrmargin - strlen(buf)) / 2;
@@ -601,7 +635,6 @@ header(struct termp *p, const struct mdoc_meta *meta)
 	p->flags &= ~TERMP_NOSPACE;
 
 	free(title);
-	free(vbuf);
 	free(buf);
 }
 
@@ -614,149 +647,27 @@ header(struct termp *p, const struct mdoc_meta *meta)
 static void
 nescape(struct termp *p, const char *word, size_t len)
 {
+	struct termenc	*enc;
 
 	switch (len) {
 	case (1):
-		switch (word[0]) {
-		case ('\\'):
-			/* FALLTHROUGH */
-		case ('\''):
-			/* FALLTHROUGH */
-		case ('`'):
-			/* FALLTHROUGH */
-		case ('-'):
-			/* FALLTHROUGH */
-		case (' '):
-			/* FALLTHROUGH */
-		case ('.'):
-			chara(p, word[0]); /* FIXME */
-			break;
-		case ('&'):
-			break;
-		case ('e'):
-			chara(p, '\\'); /* FIXME */
-			break;
-		case ('q'):
-			symbola(p, TERMSYM_DQUOTE);
-			break;
-		default:
-			warnx("escape sequence not supported: %c",
-					word[0]);
-			break;
-		}
+		enc = termenc1;
 		break;
-
 	case (2):
-		if ('r' == word[0] && 'B' == word[1])
-			symbola(p, TERMSYM_RBRACK);
-		else if ('l' == word[0] && 'B' == word[1])
-			symbola(p, TERMSYM_LBRACK);
-		else if ('l' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_LDQUOTE);
-		else if ('r' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_RDQUOTE);
-		else if ('o' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_LSQUOTE);
-		else if ('a' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_RSQUOTE);
-		else if ('<' == word[0] && '-' == word[1])
-			symbola(p, TERMSYM_LARROW);
-		else if ('-' == word[0] && '>' == word[1])
-			symbola(p, TERMSYM_RARROW);
-		else if ('b' == word[0] && 'u' == word[1])
-			symbola(p, TERMSYM_BULLET);
-		else if ('<' == word[0] && '=' == word[1])
-			symbola(p, TERMSYM_LE);
-		else if ('>' == word[0] && '=' == word[1])
-			symbola(p, TERMSYM_GE);
-		else if ('=' == word[0] && '=' == word[1])
-			symbola(p, TERMSYM_EQ);
-		else if ('+' == word[0] && '-' == word[1])
-			symbola(p, TERMSYM_PLUSMINUS);
-		else if ('u' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_UARROW);
-		else if ('d' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_DARROW);
-		else if ('a' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_ACUTE);
-		else if ('g' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_GRAVE);
-		else if ('!' == word[0] && '=' == word[1])
-			symbola(p, TERMSYM_NEQ);
-		else if ('i' == word[0] && 'f' == word[1])
-			symbola(p, TERMSYM_INF);
-		else if ('n' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_NAN);
-		else if ('b' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_BAR);
-
-		/* Deprecated forms. */
-		else if ('A' == word[0] && 'm' == word[1])
-			symbola(p, TERMSYM_AMP);
-		else if ('B' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_BAR);
-		else if ('I' == word[0] && 'f' == word[1])
-			symbola(p, TERMSYM_INF2);
-		else if ('G' == word[0] && 'e' == word[1])
-			symbola(p, TERMSYM_GE);
-		else if ('G' == word[0] && 't' == word[1])
-			symbola(p, TERMSYM_GT);
-		else if ('L' == word[0] && 'e' == word[1])
-			symbola(p, TERMSYM_LE);
-		else if ('L' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_LDQUOTE);
-		else if ('L' == word[0] && 't' == word[1])
-			symbola(p, TERMSYM_LT);
-		else if ('N' == word[0] && 'a' == word[1])
-			symbola(p, TERMSYM_NAN);
-		else if ('N' == word[0] && 'e' == word[1])
-			symbola(p, TERMSYM_NEQ);
-		else if ('P' == word[0] && 'i' == word[1])
-			symbola(p, TERMSYM_PI);
-		else if ('P' == word[0] && 'm' == word[1])
-			symbola(p, TERMSYM_PLUSMINUS);
-		else if ('R' == word[0] && 'q' == word[1])
-			symbola(p, TERMSYM_RDQUOTE);
-		else
-			warnx("escape sequence not supported: %c%c",
-					word[0], word[1]);
+		enc = termenc2;
 		break;
-
 	default:
-		warnx("escape sequence not supported");
-		break;
+		warnx("unsupported %zu-byte escape sequence", len);
+		return;
 	}
-}
 
+	for ( ; enc->enc; enc++) 
+		if (0 == memcmp(enc->enc, word, len)) {
+			symbola(p, enc->sym);
+			return;
+		}
 
-/*
- * Apply a style to the output buffer.  This is looked up by means of
- * the styletab.
- */
-static void
-style(struct termp *p, enum tstyle esc)
-{
-
-	if (p->col + 4 >= p->maxcols)
-		errx(1, "line overrun");
-
-	p->buf[(p->col)++] = 27;
-	p->buf[(p->col)++] = '[';
-	switch (esc) {
-	case (TERMSTYLE_CLEAR):
-		p->buf[(p->col)++] = '0';
-		break;
-	case (TERMSTYLE_BOLD):
-		p->buf[(p->col)++] = '1';
-		break;
-	case (TERMSTYLE_UNDER):
-		p->buf[(p->col)++] = '4';
-		break;
-	default:
-		abort();
-		/* NOTREACHED */
-	}
-	p->buf[(p->col)++] = 'm';
+	warnx("unsupported %zu-byte escape sequence", len);
 }
 
 
@@ -770,24 +681,34 @@ pescape(struct termp *p, const char *word, size_t *i, 
 {
 	size_t		 j;
 
-	(*i)++;
-	assert(*i < len);
+	if (++(*i) >= len) {
+		warnx("ignoring bad escape sequence");
+		return;
+	}
 
 	if ('(' == word[*i]) {
 		(*i)++;
-		assert(*i + 1 < len);
+		if (*i + 1 >= len) {
+			warnx("ignoring bad escape sequence");
+			return;
+		}
 		nescape(p, &word[*i], 2);
 		(*i)++;
 		return;
 
 	} else if ('*' == word[*i]) { 
-		/* XXX - deprecated! */
 		(*i)++;
-		assert(*i < len);
+		if (*i >= len) {
+			warnx("ignoring bad escape sequence");
+			return;
+		}
 		switch (word[*i]) {
 		case ('('):
 			(*i)++;
-			assert(*i + 1 < len);
+			if (*i + 1 >= len) {
+				warnx("ignoring bad escape sequence");
+				return;
+			}
 			nescape(p, &word[*i], 2);
 			(*i)++;
 			return;
@@ -807,7 +728,10 @@ pescape(struct termp *p, const char *word, size_t *i, 
 	for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
 		/* Loop... */ ;
 
-	assert(word[*i]);
+	if (0 == word[*i]) {
+		warnx("ignoring bad escape sequence");
+		return;
+	}
 	nescape(p, &word[*i - j], j);
 }
 
@@ -835,9 +759,9 @@ pword(struct termp *p, const char *word, size_t len)
 	 */
 
 	if (p->flags & TERMP_BOLD)
-		style(p, TERMSTYLE_BOLD);
+		stylea(p, TERMSTYLE_BOLD);
 	if (p->flags & TERMP_UNDERLINE)
-		style(p, TERMSTYLE_UNDER);
+		stylea(p, TERMSTYLE_UNDER);
 
 	for (i = 0; i < len; i++) {
 		if ('\\' == word[i]) {
@@ -849,7 +773,7 @@ pword(struct termp *p, const char *word, size_t len)
 
 	if (p->flags & TERMP_BOLD ||
 			p->flags & TERMP_UNDERLINE)
-		style(p, TERMSTYLE_CLEAR);
+		stylea(p, TERMSTYLE_CLEAR);
 }
 
 
@@ -866,6 +790,18 @@ symbola(struct termp *p, enum tsym sym)
 
 
 /*
+ * Add a style to the output line buffer.
+ */
+static void
+stylea(struct termp *p, enum tstyle style)
+{
+
+	assert(p->styletab[style].sym);
+	stringa(p, p->styletab[style].sym, p->styletab[style].sz);
+}
+
+
+/*
  * Like chara() but for arbitrary-length buffers.  Resize the buffer by
  * a factor of two (if the buffer is less than that) or the buffer's
  * size.
@@ -874,6 +810,9 @@ static void
 stringa(struct termp *p, const char *c, size_t sz)
 {
 	size_t		 s;
+
+	if (0 == sz)
+		return;
 
 	s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;