===================================================================
RCS file: /cvs/mandoc/term.c,v
retrieving revision 1.191
retrieving revision 1.195
diff -u -p -r1.191 -r1.195
--- mandoc/term.c	2011/05/15 22:29:50	1.191
+++ mandoc/term.c	2011/05/18 23:59:08	1.195
@@ -1,4 +1,4 @@
-/*	$Id: term.c,v 1.191 2011/05/15 22:29:50 kristaps Exp $ */
+/*	$Id: term.c,v 1.195 2011/05/18 23:59:08 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -36,6 +36,7 @@
 static	void		 adjbuf(struct termp *p, int);
 static	void		 bufferc(struct termp *, char);
 static	void		 encode(struct termp *, const char *, size_t);
+static	void		 encode1(struct termp *, int);
 
 void
 term_free(struct termp *p)
@@ -69,18 +70,6 @@ term_end(struct termp *p)
 	(*p->end)(p);
 }
 
-
-struct termp *
-term_alloc(enum termenc enc)
-{
-	struct termp	*p;
-
-	p = mandoc_calloc(1, sizeof(struct termp));
-	p->enc = enc;
-	return(p);
-}
-
-
 /*
  * Flush a line of text.  A "line" is loosely defined as being something
  * that should be followed by a newline, regardless of whether it's
@@ -415,7 +404,7 @@ term_word(struct termp *p, const char *word)
 {
 	const char	*seq, *cp;
 	char		 c;
-	int		 sz;
+	int		 sz, uc;
 	size_t		 ssz;
 	enum mandoc_esc	 esc;
 
@@ -451,6 +440,15 @@ term_word(struct termp *p, const char *word)
 			break;
 
 		switch (esc) {
+		case (ESCAPE_UNICODE):
+			if (TERMENC_ASCII == p->enc) {
+				encode1(p, '?');
+				break;
+			}
+			uc = mchars_num2uc(seq + 1, sz - 1);
+			if ('\0' != uc)
+				encode1(p, uc);
+			break;
 		case (ESCAPE_NUMBERED):
 			if ('\0' != (c = mchars_num2char(seq, sz)))
 				encode(p, &c, 1);
@@ -473,6 +471,8 @@ term_word(struct termp *p, const char *word)
 		case (ESCAPE_FONTITALIC):
 			term_fontrepl(p, TERMFONT_UNDER);
 			break;
+		case (ESCAPE_FONT):
+			/* FALLTHROUGH */
 		case (ESCAPE_FONTROMAN):
 			term_fontrepl(p, TERMFONT_NONE);
 			break;
@@ -512,7 +512,34 @@ bufferc(struct termp *p, char c)
 	p->buf[p->col++] = c;
 }
 
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
 static void
+encode1(struct termp *p, int c)
+{
+	enum termfont	  f;
+
+	if (p->col + 4 >= p->maxcols)
+		adjbuf(p, p->col + 4);
+
+	f = term_fonttop(p);
+
+	if (TERMFONT_NONE == f) {
+		p->buf[p->col++] = c;
+		return;
+	} else if (TERMFONT_UNDER == f) {
+		p->buf[p->col++] = '_';
+	} else
+		p->buf[p->col++] = c;
+
+	p->buf[p->col++] = 8;
+	p->buf[p->col++] = c;
+}
+
+static void
 encode(struct termp *p, const char *word, size_t sz)
 {
 	enum termfont	  f;
@@ -584,6 +611,7 @@ term_strlen(const struct termp *p, const char *cp)
 		for (i = 0; i < rsz; i++)
 			sz += (*p->width)(p, *cp++);
 
+		c = 0;
 		switch (*cp) {
 		case ('\\'):
 			cp++;
@@ -591,6 +619,15 @@ term_strlen(const struct termp *p, const char *cp)
 			switch (mandoc_escape(&cp, &seq, &ssz)) {
 			case (ESCAPE_ERROR):
 				return(sz);
+			case (ESCAPE_UNICODE):
+				if (TERMENC_ASCII != p->enc) {
+					sz += (*p->width)(p, '?');
+					break;
+				}
+				c = mchars_num2uc(seq + 1, ssz - 1);
+				if ('\0' != c)
+					sz += (*p->width)(p, c);
+				break;
 			case (ESCAPE_NUMBERED):
 				c = mchars_num2char(seq, ssz);
 				if ('\0' != c)