===================================================================
RCS file: /cvs/mandoc/term.c,v
retrieving revision 1.283
retrieving revision 1.289
diff -u -p -r1.283 -r1.289
--- mandoc/term.c	2021/08/10 12:55:04	1.283
+++ mandoc/term.c	2022/08/15 18:12:30	1.289
@@ -1,7 +1,7 @@
-/* $Id: term.c,v 1.283 2021/08/10 12:55:04 schwarze Exp $ */
+/* $Id: term.c,v 1.289 2022/08/15 18:12:30 schwarze Exp $ */
 /*
+ * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -58,6 +58,7 @@ term_setcol(struct termp *p, size_t maxtcol)
 void
 term_free(struct termp *p)
 {
+	term_tab_free();
 	for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
 		free(p->tcol->buf);
 	free(p->tcols);
@@ -156,6 +157,7 @@ term_flushln(struct termp *p)
 		/* Finally, print the field content. */
 
 		term_field(p, vbl, nbr);
+		p->tcol->taboff += vbr + (*p->width)(p, ' ');
 
 		/*
 		 * If there is no text left in the field, exit the loop.
@@ -206,7 +208,6 @@ term_flushln(struct termp *p)
 			return;
 
 		endline(p);
-		p->viscol = 0;
 
 		/*
 		 * Normally, start the next line at the same indentation
@@ -267,22 +268,11 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, s
 			vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
 			continue;
 
-		case '\t':  /* Normal ASCII whitespace. */
 		case ' ':
 		case ASCII_BREAK:  /* Escape \: (breakpoint). */
-			switch (p->tcol->buf[ic]) {
-			case '\t':
-				vn = term_tab_next(vis);
-				break;
-			case ' ':
-				vn = vis + (*p->width)(p, ' ');
-				break;
-			case ASCII_BREAK:
-				vn = vis;
-				break;
-			default:
-				abort();
-			}
+			vn = vis;
+			if (p->tcol->buf[ic] == ' ')
+				vn += (*p->width)(p, ' ');
 			/* Can break at the end of a word. */
 			if (breakline || vn > vtarget)
 				break;
@@ -316,12 +306,23 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, s
 			*vbr = vis;
 			continue;
 
-		case ASCII_NBRSP:  /* Non-breakable space. */
-			p->tcol->buf[ic] = ' ';
-			/* FALLTHROUGH */
-		default:  /* Printable character. */
+		default:
+			switch (p->tcol->buf[ic]) {
+			case '\t':
+				vis += p->tcol->taboff;
+				vis = term_tab_next(vis);
+				vis -= p->tcol->taboff;
+				break;
+			case ASCII_NBRZW:  /* Non-breakable zero-width. */
+				break;
+			case ASCII_NBRSP:  /* Non-breakable space. */
+				p->tcol->buf[ic] = ' ';
+				/* FALLTHROUGH */
+			default:  /* Printable character. */
+				vis += (*p->width)(p, p->tcol->buf[ic]);
+				break;
+			}
 			graph = 1;
-			vis += (*p->width)(p, p->tcol->buf[ic]);
 			if (vis > vtarget && *nbr > 0)
 				return;
 			continue;
@@ -351,8 +352,8 @@ term_field(struct termp *p, size_t vbl, size_t nbr)
 {
 	size_t	 ic;	/* Character position in the input buffer. */
 	size_t	 vis;	/* Visual position of the current character. */
+	size_t	 vt;	/* Visual position including tab offset. */
 	size_t	 dv;	/* Visual width of the current character. */
-	size_t	 vn;	/* Visual position of the next character. */
 
 	vis = 0;
 	for (ic = p->tcol->col; ic < nbr; ic++) {
@@ -365,15 +366,16 @@ term_field(struct termp *p, size_t vbl, size_t nbr)
 		switch (p->tcol->buf[ic]) {
 		case '\n':
 		case ASCII_BREAK:
+		case ASCII_NBRZW:
 			continue;
 		case '\t':
-			vn = term_tab_next(vis);
-			vbl += vn - vis;
-			vis = vn;
-			continue;
 		case ' ':
 		case ASCII_NBRSP:
-			dv = (*p->width)(p, ' ');
+			if (p->tcol->buf[ic] == '\t') {
+				vt = p->tcol->taboff + vis;
+				dv = term_tab_next(vt) - vt;
+			} else
+				dv = (*p->width)(p, ' ');
 			vbl += dv;
 			vis += dv;
 			continue;
@@ -435,7 +437,7 @@ endline(struct termp *p)
 void
 term_newln(struct termp *p)
 {
-
+	p->tcol->taboff = 0;
 	p->flags |= TERMP_NOSPACE;
 	if (p->tcol->lastcol || p->viscol)
 		term_flushln(p);
@@ -571,18 +573,23 @@ term_word(struct termp *p, const char *word)
 			break;
 		case ESCAPE_NUMBERED:
 			uc = mchars_num2char(seq, sz);
-			if (uc < 0)
-				continue;
-			break;
+			if (uc >= 0)
+				break;
+			bufferc(p, ASCII_NBRZW);
+			continue;
 		case ESCAPE_SPECIAL:
 			if (p->enc == TERMENC_ASCII) {
 				cp = mchars_spec2str(seq, sz, &ssz);
 				if (cp != NULL)
 					encode(p, cp, ssz);
+				else
+					bufferc(p, ASCII_NBRZW);
 			} else {
 				uc = mchars_spec2cp(seq, sz);
 				if (uc > 0)
 					encode1(p, uc);
+				else
+					bufferc(p, ASCII_NBRZW);
 			}
 			continue;
 		case ESCAPE_UNDEF:
@@ -627,6 +634,10 @@ term_word(struct termp *p, const char *word)
 				encode(p, "utf8", 4);
 			continue;
 		case ESCAPE_HORIZ:
+			if (p->flags & TERMP_BACKAFTER) {
+				p->flags &= ~TERMP_BACKAFTER;
+				continue;
+			}
 			if (*seq == '|') {
 				seq++;
 				uc = -p->col;
@@ -635,12 +646,24 @@ term_word(struct termp *p, const char *word)
 			if (a2roffsu(seq, &su, SCALE_EM) == NULL)
 				continue;
 			uc += term_hen(p, &su);
-			if (uc > 0)
-				while (uc-- > 0)
-					bufferc(p, ASCII_NBRSP);
-			else if (p->col > (size_t)(-uc))
+			if (uc >= 0) {
+				while (uc > 0) {
+					uc -= term_len(p, 1);
+					if (p->flags & TERMP_BACKBEFORE)
+						p->flags &= ~TERMP_BACKBEFORE;
+					else
+						bufferc(p, ASCII_NBRSP);
+				}
+				continue;
+			}
+			if (p->flags & TERMP_BACKBEFORE) {
+				p->flags &= ~TERMP_BACKBEFORE;
+				assert(p->col > 0);
+				p->col--;
+			}
+			if (p->col >= (size_t)(-uc)) {
 				p->col += uc;
-			else {
+			} else {
 				uc += p->col;
 				p->col = 0;
 				if (p->tcol->offset > (size_t)(-uc)) {
@@ -728,6 +751,9 @@ term_word(struct termp *p, const char *word)
 			if (p->col > p->tcol->lastcol)
 				p->col = p->tcol->lastcol;
 			continue;
+		case ESCAPE_IGNORE:
+			bufferc(p, ASCII_NBRZW);
+			continue;
 		default:
 			continue;
 		}
@@ -919,8 +945,8 @@ term_strlen(const struct termp *p, const char *cp)
 	int		 ssz, skip, uc;
 	const char	*seq, *rhs;
 	enum mandoc_esc	 esc;
-	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
-			ASCII_BREAK, '\0' };
+	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
+		ASCII_BREAK, ASCII_HYPH, '\0' };
 
 	/*
 	 * Account for escaped sequences within string length