=================================================================== RCS file: /cvs/mandoc/term.c,v retrieving revision 1.283 retrieving revision 1.289 diff -u -p -r1.283 -r1.289 --- mandoc/term.c 2021/08/10 12:55:04 1.283 +++ mandoc/term.c 2022/08/15 18:12:30 1.289 @@ -1,7 +1,7 @@ -/* $Id: term.c,v 1.283 2021/08/10 12:55:04 schwarze Exp $ */ +/* $Id: term.c,v 1.289 2022/08/15 18:12:30 schwarze Exp $ */ /* + * Copyright (c) 2010-2022 Ingo Schwarze * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010-2020 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -58,6 +58,7 @@ term_setcol(struct termp *p, size_t maxtcol) void term_free(struct termp *p) { + term_tab_free(); for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) free(p->tcol->buf); free(p->tcols); @@ -156,6 +157,7 @@ term_flushln(struct termp *p) /* Finally, print the field content. */ term_field(p, vbl, nbr); + p->tcol->taboff += vbr + (*p->width)(p, ' '); /* * If there is no text left in the field, exit the loop. @@ -206,7 +208,6 @@ term_flushln(struct termp *p) return; endline(p); - p->viscol = 0; /* * Normally, start the next line at the same indentation @@ -267,22 +268,11 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, s vis -= (*p->width)(p, p->tcol->buf[ic - 1]); continue; - case '\t': /* Normal ASCII whitespace. */ case ' ': case ASCII_BREAK: /* Escape \: (breakpoint). */ - switch (p->tcol->buf[ic]) { - case '\t': - vn = term_tab_next(vis); - break; - case ' ': - vn = vis + (*p->width)(p, ' '); - break; - case ASCII_BREAK: - vn = vis; - break; - default: - abort(); - } + vn = vis; + if (p->tcol->buf[ic] == ' ') + vn += (*p->width)(p, ' '); /* Can break at the end of a word. */ if (breakline || vn > vtarget) break; @@ -316,12 +306,23 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, s *vbr = vis; continue; - case ASCII_NBRSP: /* Non-breakable space. */ - p->tcol->buf[ic] = ' '; - /* FALLTHROUGH */ - default: /* Printable character. */ + default: + switch (p->tcol->buf[ic]) { + case '\t': + vis += p->tcol->taboff; + vis = term_tab_next(vis); + vis -= p->tcol->taboff; + break; + case ASCII_NBRZW: /* Non-breakable zero-width. */ + break; + case ASCII_NBRSP: /* Non-breakable space. */ + p->tcol->buf[ic] = ' '; + /* FALLTHROUGH */ + default: /* Printable character. */ + vis += (*p->width)(p, p->tcol->buf[ic]); + break; + } graph = 1; - vis += (*p->width)(p, p->tcol->buf[ic]); if (vis > vtarget && *nbr > 0) return; continue; @@ -351,8 +352,8 @@ term_field(struct termp *p, size_t vbl, size_t nbr) { size_t ic; /* Character position in the input buffer. */ size_t vis; /* Visual position of the current character. */ + size_t vt; /* Visual position including tab offset. */ size_t dv; /* Visual width of the current character. */ - size_t vn; /* Visual position of the next character. */ vis = 0; for (ic = p->tcol->col; ic < nbr; ic++) { @@ -365,15 +366,16 @@ term_field(struct termp *p, size_t vbl, size_t nbr) switch (p->tcol->buf[ic]) { case '\n': case ASCII_BREAK: + case ASCII_NBRZW: continue; case '\t': - vn = term_tab_next(vis); - vbl += vn - vis; - vis = vn; - continue; case ' ': case ASCII_NBRSP: - dv = (*p->width)(p, ' '); + if (p->tcol->buf[ic] == '\t') { + vt = p->tcol->taboff + vis; + dv = term_tab_next(vt) - vt; + } else + dv = (*p->width)(p, ' '); vbl += dv; vis += dv; continue; @@ -435,7 +437,7 @@ endline(struct termp *p) void term_newln(struct termp *p) { - + p->tcol->taboff = 0; p->flags |= TERMP_NOSPACE; if (p->tcol->lastcol || p->viscol) term_flushln(p); @@ -571,18 +573,23 @@ term_word(struct termp *p, const char *word) break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); - if (uc < 0) - continue; - break; + if (uc >= 0) + break; + bufferc(p, ASCII_NBRZW); + continue; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { cp = mchars_spec2str(seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); + else + bufferc(p, ASCII_NBRZW); } else { uc = mchars_spec2cp(seq, sz); if (uc > 0) encode1(p, uc); + else + bufferc(p, ASCII_NBRZW); } continue; case ESCAPE_UNDEF: @@ -627,6 +634,10 @@ term_word(struct termp *p, const char *word) encode(p, "utf8", 4); continue; case ESCAPE_HORIZ: + if (p->flags & TERMP_BACKAFTER) { + p->flags &= ~TERMP_BACKAFTER; + continue; + } if (*seq == '|') { seq++; uc = -p->col; @@ -635,12 +646,24 @@ term_word(struct termp *p, const char *word) if (a2roffsu(seq, &su, SCALE_EM) == NULL) continue; uc += term_hen(p, &su); - if (uc > 0) - while (uc-- > 0) - bufferc(p, ASCII_NBRSP); - else if (p->col > (size_t)(-uc)) + if (uc >= 0) { + while (uc > 0) { + uc -= term_len(p, 1); + if (p->flags & TERMP_BACKBEFORE) + p->flags &= ~TERMP_BACKBEFORE; + else + bufferc(p, ASCII_NBRSP); + } + continue; + } + if (p->flags & TERMP_BACKBEFORE) { + p->flags &= ~TERMP_BACKBEFORE; + assert(p->col > 0); + p->col--; + } + if (p->col >= (size_t)(-uc)) { p->col += uc; - else { + } else { uc += p->col; p->col = 0; if (p->tcol->offset > (size_t)(-uc)) { @@ -728,6 +751,9 @@ term_word(struct termp *p, const char *word) if (p->col > p->tcol->lastcol) p->col = p->tcol->lastcol; continue; + case ESCAPE_IGNORE: + bufferc(p, ASCII_NBRZW); + continue; default: continue; } @@ -919,8 +945,8 @@ term_strlen(const struct termp *p, const char *cp) int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; - static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, - ASCII_BREAK, '\0' }; + static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW, + ASCII_BREAK, ASCII_HYPH, '\0' }; /* * Account for escaped sequences within string length