===================================================================
RCS file: /cvs/mandoc/term.c,v
retrieving revision 1.191
retrieving revision 1.199
diff -u -p -r1.191 -r1.199
--- mandoc/term.c	2011/05/15 22:29:50	1.191
+++ mandoc/term.c	2011/09/18 21:18:19	1.199
@@ -1,6 +1,6 @@
-/*	$Id: term.c,v 1.191 2011/05/15 22:29:50 kristaps Exp $ */
+/*	$Id: term.c,v 1.199 2011/09/18 21:18:19 schwarze Exp $ */
 /*
- * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -36,6 +36,7 @@
 static	void		 adjbuf(struct termp *p, int);
 static	void		 bufferc(struct termp *, char);
 static	void		 encode(struct termp *, const char *, size_t);
+static	void		 encode1(struct termp *, int);
 
 void
 term_free(struct termp *p)
@@ -69,18 +70,6 @@ term_end(struct termp *p)
 	(*p->end)(p);
 }
 
-
-struct termp *
-term_alloc(enum termenc enc)
-{
-	struct termp	*p;
-
-	p = mandoc_calloc(1, sizeof(struct termp));
-	p->enc = enc;
-	return(p);
-}
-
-
 /*
  * Flush a line of text.  A "line" is loosely defined as being something
  * that should be followed by a newline, regardless of whether it's
@@ -415,7 +404,7 @@ term_word(struct termp *p, const char *word)
 {
 	const char	*seq, *cp;
 	char		 c;
-	int		 sz;
+	int		 sz, uc;
 	size_t		 ssz;
 	enum mandoc_esc	 esc;
 
@@ -450,16 +439,33 @@ term_word(struct termp *p, const char *word)
 		if (ESCAPE_ERROR == esc)
 			break;
 
+		if (TERMENC_ASCII != p->enc)
+			switch (esc) {
+			case (ESCAPE_UNICODE):
+				uc = mchars_num2uc(seq + 1, sz - 1);
+				if ('\0' == uc)
+					break;
+				encode1(p, uc);
+				continue;
+			case (ESCAPE_SPECIAL):
+				uc = mchars_spec2cp(p->symtab, seq, sz);
+				if (uc <= 0)
+					break;
+				encode1(p, uc);
+				continue;
+			default:
+				break;
+			}
+
 		switch (esc) {
+		case (ESCAPE_UNICODE):
+			encode1(p, '?');
+			break;
 		case (ESCAPE_NUMBERED):
-			if ('\0' != (c = mchars_num2char(seq, sz)))
+			c = mchars_num2char(seq, sz);
+			if ('\0' != c)
 				encode(p, &c, 1);
 			break;
-		case (ESCAPE_PREDEF):
-			cp = mchars_res2str(p->symtab, seq, sz, &ssz);
-			if (NULL != cp)
-				encode(p, cp, ssz);
-			break;
 		case (ESCAPE_SPECIAL):
 			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
 			if (NULL != cp) 
@@ -473,6 +479,8 @@ term_word(struct termp *p, const char *word)
 		case (ESCAPE_FONTITALIC):
 			term_fontrepl(p, TERMFONT_UNDER);
 			break;
+		case (ESCAPE_FONT):
+			/* FALLTHROUGH */
 		case (ESCAPE_FONTROMAN):
 			term_fontrepl(p, TERMFONT_NONE);
 			break;
@@ -512,7 +520,34 @@ bufferc(struct termp *p, char c)
 	p->buf[p->col++] = c;
 }
 
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
 static void
+encode1(struct termp *p, int c)
+{
+	enum termfont	  f;
+
+	if (p->col + 4 >= p->maxcols)
+		adjbuf(p, p->col + 4);
+
+	f = term_fonttop(p);
+
+	if (TERMFONT_NONE == f) {
+		p->buf[p->col++] = c;
+		return;
+	} else if (TERMFONT_UNDER == f) {
+		p->buf[p->col++] = '_';
+	} else
+		p->buf[p->col++] = c;
+
+	p->buf[p->col++] = 8;
+	p->buf[p->col++] = c;
+}
+
+static void
 encode(struct termp *p, const char *word, size_t sz)
 {
 	enum termfont	  f;
@@ -541,13 +576,16 @@ encode(struct termp *p, const char *word, size_t sz)
 		adjbuf(p, p->col + 1 + (len * 3));
 
 	for (i = 0; i < len; i++) {
-		if ( ! isgraph((unsigned char)word[i])) {
+		if (ASCII_HYPH != word[i] &&
+		    ! isgraph((unsigned char)word[i])) {
 			p->buf[p->col++] = word[i];
 			continue;
 		}
 
 		if (TERMFONT_UNDER == f)
 			p->buf[p->col++] = '_';
+		else if (ASCII_HYPH == word[i])
+			p->buf[p->col++] = '-';
 		else
 			p->buf[p->col++] = word[i];
 
@@ -570,6 +608,7 @@ term_strlen(const struct termp *p, const char *cp)
 	size_t		 sz, rsz, i;
 	int		 ssz, c;
 	const char	*seq, *rhs;
+	enum mandoc_esc	 esc;
 	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
 
 	/*
@@ -584,21 +623,44 @@ term_strlen(const struct termp *p, const char *cp)
 		for (i = 0; i < rsz; i++)
 			sz += (*p->width)(p, *cp++);
 
+		c = 0;
 		switch (*cp) {
 		case ('\\'):
 			cp++;
-			rhs = NULL;
-			switch (mandoc_escape(&cp, &seq, &ssz)) {
-			case (ESCAPE_ERROR):
+			esc = mandoc_escape(&cp, &seq, &ssz);
+			if (ESCAPE_ERROR == esc)
 				return(sz);
+
+			if (TERMENC_ASCII != p->enc)
+				switch (esc) {
+				case (ESCAPE_UNICODE):
+					c = mchars_num2uc
+						(seq + 1, ssz - 1);
+					if ('\0' == c)
+						break;
+					sz += (*p->width)(p, c);
+					continue;
+				case (ESCAPE_SPECIAL):
+					c = mchars_spec2cp
+						(p->symtab, seq, ssz);
+					if (c <= 0)
+						break;
+					sz += (*p->width)(p, c);
+					continue;
+				default:
+					break;
+				}
+
+			rhs = NULL;
+
+			switch (esc) {
+			case (ESCAPE_UNICODE):
+				sz += (*p->width)(p, '?');
+				break;
 			case (ESCAPE_NUMBERED):
 				c = mchars_num2char(seq, ssz);
 				if ('\0' != c)
 					sz += (*p->width)(p, c);
-				break;
-			case (ESCAPE_PREDEF):
-				rhs = mchars_res2str
-					(p->symtab, seq, ssz, &rsz);
 				break;
 			case (ESCAPE_SPECIAL):
 				rhs = mchars_spec2str