mandoc/term.c - diff

Return to term.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/term.c between version 1.89 and 1.258

-version 1.89, 2009/07/16 13:17:51
+version 1.258, 2016/08/10 11:03:43
 Line 1
 Line 1
 Line 1
  /*      $Id$ */
  /*
-  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
+  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+  * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
   * copyright notice and this permission notice appear in all copies.
   *
-  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   */
+ #include "config.h"
+ #include <sys/types.h>
  #include <assert.h>
- #include <err.h>
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+ #include "mandoc.h"
+ #include "mandoc_aux.h"
+ #include "out.h"
  #include "term.h"
- #include "man.h"
+ #include "main.h"
- #include "mdoc.h"
- extern  int               man_run(struct termp *,
+ static  size_t           cond_width(const struct termp *, int, int *);
-                                 const struct man *);
+ static  void             adjbuf(struct termp *p, size_t);
- extern  int               mdoc_run(struct termp *,
+ static  void             bufferc(struct termp *, char);
-                                 const struct mdoc *);
+ static  void             encode(struct termp *, const char *, size_t);
+ static  void             encode1(struct termp *, int);
- static  struct termp     *term_alloc(enum termenc);
- static  void              term_free(struct termp *);
- static  void              term_pescape(struct termp *, const char **);
- static  void              term_nescape(struct termp *,
-                                 const char *, size_t);
- static  void              term_chara(struct termp *, char);
- static  void              term_encodea(struct termp *, char);
- static  int               term_isopendelim(const char *);
- static  int               term_isclosedelim(const char *);
- void *
- ascii_alloc(void)
- {
-         return(term_alloc(TERMENC_ASCII));
- }
- int
- terminal_man(void *arg, const struct man *man)
- {
-         struct termp    *p;
-         p = (struct termp *)arg;
-         if (NULL == p->symtab)
-                 p->symtab = term_ascii2htab();
-         return(man_run(p, man));
- }
- int
- terminal_mdoc(void *arg, const struct mdoc *mdoc)
- {
-         struct termp    *p;
-         p = (struct termp *)arg;
-         if (NULL == p->symtab)
-                 p->symtab = term_ascii2htab();
-         return(mdoc_run(p, mdoc));
- }
  void
- terminal_free(void *arg)
- {
-         term_free((struct termp *)arg);
- }
- static void
  term_free(struct termp *p)
  {
-         if (p->buf)
+         free(p->buf);
-                 free(p->buf);
+         free(p->fontq);
-         if (TERMENC_ASCII == p->enc && p->symtab)
-                 term_asciifree(p->symtab);
          free(p);
  }
+ void
- static struct termp *
+ term_begin(struct termp *p, term_margin head,
- term_alloc(enum termenc enc)
+                 term_margin foot, const struct roff_meta *arg)
  {
-         struct termp *p;
-         if (NULL == (p = malloc(sizeof(struct termp))))
+         p->headf = head;
-                 err(1, "malloc");
+         p->footf = foot;
-         bzero(p, sizeof(struct termp));
+         p->argf = arg;
-         p->maxrmargin = 78;
+         (*p->begin)(p);
-         p->enc = enc;
-         return(p);
  }
+ void
- static int
+ term_end(struct termp *p)
- term_isclosedelim(const char *p)
  {
-         if ( ! (*p && 0 == *(p + 1)))
+         (*p->end)(p);
-                 return(0);
-         switch (*p) {
-         case('.'):
-                 /* FALLTHROUGH */
-         case(','):
-                 /* FALLTHROUGH */
-         case(';'):
-                 /* FALLTHROUGH */
-         case(':'):
-                 /* FALLTHROUGH */
-         case('?'):
-                 /* FALLTHROUGH */
-         case('!'):
-                 /* FALLTHROUGH */
-         case(')'):
-                 /* FALLTHROUGH */
-         case(']'):
-                 /* FALLTHROUGH */
-         case('}'):
-                 return(1);
-         default:
-                 break;
-         }
-         return(0);
  }
- static int
- term_isopendelim(const char *p)
- {
-         if ( ! (*p && 0 == *(p + 1)))
-                 return(0);
-         switch (*p) {
-         case('('):
-                 /* FALLTHROUGH */
-         case('['):
-                 /* FALLTHROUGH */
-         case('{'):
-                 return(1);
-         default:
-                 break;
-         }
-         return(0);
- }
  /*
-  * Flush a line of text.  A "line" is loosely defined as being something
+  * Flush a chunk of text.  By default, break the output line each time
-  * that should be followed by a newline, regardless of whether it's
+  * the right margin is reached, and continue output on the next line
-  * broken apart by newlines getting there.  A line can also be a
+  * at the same offset as the chunk itself.  By default, also break the
-  * fragment of a columnar list.
+  * output line at the end of the chunk.
+  * The following flags may be specified:
   *
-  * Specifically, a line is whatever's in p->buf of length p->col, which
+  *  - TERMP_NOBREAK: Do not break the output line at the right margin,
-  * is zeroed after this function returns.
+  *    but only at the max right margin.  Also, do not break the output
-  *
+  *    line at the end of the chunk, such that the next call can pad to
-  * The usage of termp:flags is as follows:
+  *    the next column.  However, if less than p->trailspace blanks,
-  *
+  *    which can be 0, 1, or 2, remain to the right margin, the line
-  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
+  *    will be broken.
-  *    offset value.  This is useful when doing columnar lists where the
+  *  - TERMP_BRTRSP: Consider trailing whitespace significant
-  *    prior column has right-padded.
+  *    when deciding whether the chunk fits or not.
-  *
+  *  - TERMP_BRIND: If the chunk does not fit and the output line has
-  *  - TERMP_NOBREAK: this is the most important and is used when making
+  *    to be broken, start the next line at the right margin instead
-  *    columns.  In short: don't print a newline and instead pad to the
+  *    of at the offset.  Used together with TERMP_NOBREAK for the tags
-  *    right margin.  Used in conjunction with TERMP_NOLPAD.
+  *    in various kinds of tagged lists.
-  *
+  *  - TERMP_DANGLE: Do not break the output line at the right margin,
-  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
+  *    append the next chunk after it even if this one is too long.
-  *    the line is overrun, and don't pad-right if it's underrun.
+  *    To be used together with TERMP_NOBREAK.
-  *
+  *  - TERMP_HANG: Like TERMP_DANGLE, and also suppress padding before
-  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
+  *    the next chunk if this column is not full.
-  *    overruning, instead save the position and continue at that point
-  *    when the next invocation.
-  *
-  *  In-line line breaking:
-  *
-  *  If TERMP_NOBREAK is specified and the line overruns the right
-  *  margin, it will break and pad-right to the right margin after
-  *  writing.  If maxrmargin is violated, it will break and continue
-  *  writing from the right-margin, which will lead to the above
-  *  scenario upon exit.
-  *
-  *  Otherwise, the line will break at the right margin.  Extremely long
-  *  lines will cause the system to emit a warning (TODO: hyphenate, if
-  *  possible).
-  *
-  *  FIXME: newline breaks occur (in groff) also occur when a single
-  *  space follows a NOBREAK!
   */
  void
  term_flushln(struct termp *p)
  {
-         int              i, j;
+         size_t           i;     /* current input position in p->buf */
-         size_t           vbl, vsz, vis, maxvis, mmax, bp;
+         int              ntab;  /* number of tabs to prepend */
-         static int       sv = -1;
+         size_t           vis;   /* current visual position on output */
+         size_t           vbl;   /* number of blanks to prepend to output */
+         size_t           vend;  /* end of word visual position on output */
+         size_t           bp;    /* visual right border position */
+         size_t           dv;    /* temporary for visual pos calculations */
+         size_t           j;     /* temporary loop index for p->buf */
+         size_t           jhy;   /* last hyph before overflow w/r/t j */
+         size_t           maxvis; /* output position of visible boundary */
          /*
           * First, establish the maximum columns of "visible" content.
           * This is usually the difference between the right-margin and
           * an indentation, but can be, for tagged lists or columns, a
           * small set of values.
+          *
+          * The following unsigned-signed subtractions look strange,
+          * but they are actually correct.  If the int p->overstep
+          * is negative, it gets sign extended.  Subtracting that
+          * very large size_t effectively adds a small number to dv.
           */
+         dv = p->rmargin > p->offset ? p->rmargin - p->offset : 0;
+         maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
-         assert(p->offset < p->rmargin);
+         if (p->flags & TERMP_NOBREAK) {
-         maxvis = p->rmargin - p->offset;
+                 dv = p->maxrmargin > p->offset ?
-         mmax = p->maxrmargin - p->offset;
+                      p->maxrmargin - p->offset : 0;
-         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
+                 bp = (int)dv > p->overstep ?
-         vis = 0;
+                      dv - (size_t)p->overstep : 0;
+         } else
+                 bp = maxvis;
-         if (sv >= 0) {
-                 vis = (size_t)sv;
-                 sv = -1;
-         }
          /*
-          * If in the standard case (left-justified), then begin with our
+          * Calculate the required amount of padding.
-          * indentation, otherwise (columns, etc.) just start spitting
-          * out text.
           */
+         vbl = p->offset + p->overstep > p->viscol ?
+               p->offset + p->overstep - p->viscol : 0;
-         if ( ! (p->flags & TERMP_NOLPAD))
+         vis = vend = 0;
-                 /* LINTED */
+         i = 0;
-                 for (j = 0; j < (int)p->offset; j++)
-                         putchar(' ');
-         for (i = 0; i < (int)p->col; i++) {
+         while (i < p->col) {
                  /*
+                  * Handle literal tab characters: collapse all
+                  * subsequent tabs into a single huge set of spaces.
+                  */
+                 ntab = 0;
+                 while (i < p->col && '\t' == p->buf[i]) {
+                         vend = (vis / p->tabwidth + 1) * p->tabwidth;
+                         vbl += vend - vis;
+                         vis = vend;
+                         ntab++;
+                         i++;
+                 }
+                 /*
                   * Count up visible word characters.  Control sequences
                   * (starting with the CSI) aren't counted.  A space
                   * generates a non-printing word, which is valid (the
                   * space is printed according to regular spacing rules).
                   */
-                 /* LINTED */
+                 for (j = i, jhy = 0; j < p->col; j++) {
-                 for (j = i, vsz = 0; j < (int)p->col; j++) {
+                         if (' ' == p->buf[j] || '\t' == p->buf[j])
-                         if (' ' == p->buf[j])
                                  break;
-                         else if (8 == p->buf[j])
-                                 vsz--;
+                         /* Back over the last printed character. */
-                         else
+                         if (8 == p->buf[j]) {
-                                 vsz++;
+                                 assert(j);
+                                 vend -= (*p->width)(p, p->buf[j - 1]);
+                                 continue;
+                         }
+                         /* Regular word. */
+                         /* Break at the hyphen point if we overrun. */
+                         if (vend > vis && vend < bp &&
+                             (ASCII_HYPH == p->buf[j] ||
+                              ASCII_BREAK == p->buf[j]))
+                                 jhy = j;
+                         /*
+                          * Hyphenation now decided, put back a real
+                          * hyphen such that we get the correct width.
+                          */
+                         if (ASCII_HYPH == p->buf[j])
+                                 p->buf[j] = '-';
+                         vend += (*p->width)(p, p->buf[j]);
                  }
                  /*
-                  * Choose the number of blanks to prepend: no blank at the
-                  * beginning of a line, one between words -- but do not
-                  * actually write them yet.
-                  */
-                 vbl = (size_t)(0 == vis ? 0 : 1);
-                 /*
                   * Find out whether we would exceed the right margin.
-                  * If so, break to the next line.  (TODO: hyphenate)
+                  * If so, break to the next line.
-                  * Otherwise, write the chosen number of blanks now.
                   */
-                 if (vis && vis + vbl + vsz > bp) {
+                 if (vend > bp && 0 == jhy && vis > 0) {
-                         putchar('\n');
+                         vend -= vis;
-                         if (TERMP_NOBREAK & p->flags) {
+                         (*p->endline)(p);
-                                 for (j = 0; j < (int)p->rmargin; j++)
+                         p->viscol = 0;
-                                         putchar(' ');
+                         if (TERMP_BRIND & p->flags) {
-                                 vis = p->rmargin - p->offset;
+                                 vbl = p->rmargin;
-                         } else {
+                                 vend += p->rmargin;
-                                 for (j = 0; j < (int)p->offset; j++)
+                                 vend -= p->offset;
-                                         putchar(' ');
+                         } else
-                                 vis = 0;
+                                 vbl = p->offset;
-                         }
-                 } else {
+                         /* use pending tabs on the new line */
-                         for (j = 0; j < (int)vbl; j++)
-                                 putchar(' ');
+                         if (0 < ntab)
-                         vis += vbl;
+                                 vbl += ntab * p->tabwidth;
+                         /*
+                          * Remove the p->overstep width.
+                          * Again, if p->overstep is negative,
+                          * sign extension does the right thing.
+                          */
+                         bp += (size_t)p->overstep;
+                         p->overstep = 0;
                  }
-                 /*
+                 /* Write out the [remaining] word. */
-                  * Finally, write out the word.
+                 for ( ; i < p->col; i++) {
-                  */
+                         if (vend > bp && jhy > 0 && i > jhy)
-                 for ( ; i < (int)p->col; i++) {
-                         if (' ' == p->buf[i])
                                  break;
-                         putchar(p->buf[i]);
+                         if ('\t' == p->buf[i])
+                                 break;
+                         if (' ' == p->buf[i]) {
+                                 j = i;
+                                 while (i < p->col && ' ' == p->buf[i])
+                                         i++;
+                                 dv = (i - j) * (*p->width)(p, ' ');
+                                 vbl += dv;
+                                 vend += dv;
+                                 break;
+                         }
+                         if (ASCII_NBRSP == p->buf[i]) {
+                                 vbl += (*p->width)(p, ' ');
+                                 continue;
+                         }
+                         if (ASCII_BREAK == p->buf[i])
+                                 continue;
+                         /*
+                          * Now we definitely know there will be
+                          * printable characters to output,
+                          * so write preceding white space now.
+                          */
+                         if (vbl) {
+                                 (*p->advance)(p, vbl);
+                                 p->viscol += vbl;
+                                 vbl = 0;
+                         }
+                         (*p->letter)(p, p->buf[i]);
+                         if (8 == p->buf[i])
+                                 p->viscol -= (*p->width)(p, p->buf[i-1]);
+                         else
+                                 p->viscol += (*p->width)(p, p->buf[i]);
                  }
-                 vis += vsz;
+                 vis = vend;
          }
          /*
-          * If we've overstepped our maximum visible no-break space, then
+          * If there was trailing white space, it was not printed;
-          * cause a newline and offset at the right margin.
+          * so reset the cursor position accordingly.
           */
+         if (vis > vbl)
+                 vis -= vbl;
+         else
+                 vis = 0;
-         if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
+         p->col = 0;
-                 if ( ! (TERMP_DANGLE & p->flags) &&
+         p->overstep = 0;
-                                 ! (TERMP_HANG & p->flags)) {
+         p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE);
-                         putchar('\n');
-                         for (i = 0; i < (int)p->rmargin; i++)
+         if ( ! (TERMP_NOBREAK & p->flags)) {
-                                 putchar(' ');
+                 p->viscol = 0;
-                 }
+                 (*p->endline)(p);
-                 if (TERMP_HANG & p->flags)
-                         sv = (int)(vis - maxvis);
-                 p->col = 0;
                  return;
          }
-         /*
+         if (TERMP_HANG & p->flags) {
-          * If we're not to right-marginalise it (newline), then instead
+                 p->overstep += (int)(p->offset + vis - p->rmargin +
-          * pad to the right margin and stay off.
+                     p->trailspace * (*p->width)(p, ' '));
-          */
-         if (p->flags & TERMP_NOBREAK) {
+                 /*
-                 if ( ! (TERMP_DANGLE & p->flags))
+                  * If we have overstepped the margin, temporarily move
-                         for ( ; vis < maxvis; vis++)
+                  * it to the right and flag the rest of the line to be
-                                 putchar(' ');
+                  * shorter.
-         } else
+                  * If there is a request to keep the columns together,
-                 putchar('\n');
+                  * allow negative overstep when the column is not full.
+                  */
+                 if (p->trailspace && p->overstep < 0)
+                         p->overstep = 0;
+                 return;
-         p->col = 0;
+         } else if (TERMP_DANGLE & p->flags)
- }
+                 return;
+         /* Trailing whitespace is significant in some columns. */
+         if (vis && vbl && (TERMP_BRTRSP & p->flags))
+                 vis += vbl;
- /*
+         /* If the column was overrun, break the line. */
+         if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) {
+                 (*p->endline)(p);
+                 p->viscol = 0;
+         }
+ }
+ /*
   * A newline only breaks an existing line; it won't assert vertical
   * space.  All data in the output buffer is flushed prior to the newline
   * assertion.
-Line 344  term_newln(struct termp *p)
+Line 314  term_newln(struct termp *p)
 Line 344  term_newln(struct termp *p)
 Line 314  term_newln(struct termp *p)
  {
          p->flags |= TERMP_NOSPACE;
-         if (0 == p->col) {
+         if (p->col || p->viscol)
-                 p->flags &= ~TERMP_NOLPAD;
+                 term_flushln(p);
-                 return;
-         }
-         term_flushln(p);
-         p->flags &= ~TERMP_NOLPAD;
  }
  /*
   * Asserts a vertical space (a full, empty line-break between lines).
   * Note that if used twice, this will cause two blank spaces and so on.
-Line 364  term_vspace(struct termp *p)
+Line 329  term_vspace(struct termp *p)
 Line 364  term_vspace(struct termp *p)
 Line 329  term_vspace(struct termp *p)
  {
          term_newln(p);
-         putchar('\n');
+         p->viscol = 0;
+         if (0 < p->skipvsp)
+                 p->skipvsp--;
+         else
+                 (*p->endline)(p);
  }
+ /* Swap current and previous font; for \fP and .ft P */
+ void
+ term_fontlast(struct termp *p)
+ {
+         enum termfont    f;
- /*
+         f = p->fontl;
-  * Determine the symbol indicated by an escape sequences, that is, one
+         p->fontl = p->fontq[p->fonti];
-  * starting with a backslash.  Once done, we pass this value into the
+         p->fontq[p->fonti] = f;
-  * output buffer by way of the symbol table.
+ }
-  */
- static void
+ /* Set font, save current, discard previous; for \f, .ft, .B etc. */
- term_nescape(struct termp *p, const char *word, size_t len)
+ void
+ term_fontrepl(struct termp *p, enum termfont f)
  {
-         const char      *rhs;
-         size_t           sz;
-         int              i;
-         rhs = term_a2ascii(p->symtab, word, len, &sz);
+         p->fontl = p->fontq[p->fonti];
+         p->fontq[p->fonti] = f;
+ }
-         if (rhs)
+ /* Set font, save previous. */
-                 for (i = 0; i < (int)sz; i++)
+ void
-                         term_encodea(p, rhs[i]);
+ term_fontpush(struct termp *p, enum termfont f)
+ {
+         p->fontl = p->fontq[p->fonti];
+         if (++p->fonti == p->fontsz) {
+                 p->fontsz += 8;
+                 p->fontq = mandoc_reallocarray(p->fontq,
+                     p->fontsz, sizeof(*p->fontq));
+         }
+         p->fontq[p->fonti] = f;
  }
+ /* Flush to make the saved pointer current again. */
+ void
+ term_fontpopq(struct termp *p, int i)
+ {
+         assert(i >= 0);
+         if (p->fonti > i)
+                 p->fonti = i;
+ }
+ /* Pop one font off the stack. */
+ void
+ term_fontpop(struct termp *p)
+ {
+         assert(p->fonti);
+         p->fonti--;
+ }
  /*
-  * Handle an escape sequence: determine its length and pass it to the
+  * Handle pwords, partial words, which may be either a single word or a
-  * escape-symbol look table.  Note that we assume mdoc(3) has validated
+  * phrase that cannot be broken down (such as a literal string).  This
-  * the escape sequence (we assert upon badly-formed escape sequences).
+  * handles word styling.
   */
- static void
+ void
- term_pescape(struct termp *p, const char **word)
+ term_word(struct termp *p, const char *word)
  {
-         int              j;
+         const char       nbrsp[2] = { ASCII_NBRSP, 0 };
-         const char      *wp;
+         const char      *seq, *cp;
+         int              sz, uc;
+         size_t           ssz;
+         enum mandoc_esc  esc;
-         wp = *word;
+         if ( ! (TERMP_NOSPACE & p->flags)) {
+                 if ( ! (TERMP_KEEP & p->flags)) {
-         if (0 == *(++wp)) {
+                         bufferc(p, ' ');
-                 *word = wp;
+                         if (TERMP_SENTENCE & p->flags)
-                 return;
+                                 bufferc(p, ' ');
+                 } else
+                         bufferc(p, ASCII_NBRSP);
          }
+         if (TERMP_PREKEEP & p->flags)
+                 p->flags |= TERMP_KEEP;
-         if ('(' == *wp) {
+         if ( ! (p->flags & TERMP_NONOSPACE))
-                 wp++;
+                 p->flags &= ~TERMP_NOSPACE;
-                 if (0 == *wp || 0 == *(wp + 1)) {
+         else
-                         *word = 0 == *wp ? wp : wp + 1;
+                 p->flags |= TERMP_NOSPACE;
-                         return;
-                 }
-                 term_nescape(p, wp, 2);
+         p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
-                 *word = ++wp;
+         p->skipvsp = 0;
-                 return;
-         } else if ('*' == *wp) {
+         while ('\0' != *word) {
-                 if (0 == *(++wp)) {
+                 if ('\\' != *word) {
-                         *word = wp;
+                         if (TERMP_NBRWORD & p->flags) {
-                         return;
+                                 if (' ' == *word) {
+                                         encode(p, nbrsp, 1);
+                                         word++;
+                                         continue;
+                                 }
+                                 ssz = strcspn(word, "\\ ");
+                         } else
+                                 ssz = strcspn(word, "\\");
+                         encode(p, word, ssz);
+                         word += (int)ssz;
+                         continue;
                  }
-                 switch (*wp) {
+                 word++;
-                 case ('('):
+                 esc = mandoc_escape(&word, &seq, &sz);
-                         wp++;
+                 if (ESCAPE_ERROR == esc)
-                         if (0 == *wp || 0 == *(wp + 1)) {
+                         continue;
-                                 *word = 0 == *wp ? wp : wp + 1;
-                                 return;
-                         }
-                         term_nescape(p, wp, 2);
+                 switch (esc) {
-                         *word = ++wp;
+                 case ESCAPE_UNICODE:
-                         return;
+                         uc = mchars_num2uc(seq + 1, sz - 1);
-                 case ('['):
                          break;
-                 default:
+                 case ESCAPE_NUMBERED:
-                         term_nescape(p, wp, 1);
+                         uc = mchars_num2char(seq, sz);
-                         *word = wp;
+                         if (uc < 0)
-                         return;
+                                 continue;
-                 }
-         } else if ('f' == *wp) {
-                 if (0 == *(++wp)) {
-                         *word = wp;
-                         return;
-                 }
-                 switch (*wp) {
-                 case ('B'):
-                         p->flags |= TERMP_BOLD;
                          break;
-                 case ('I'):
+                 case ESCAPE_SPECIAL:
-                         p->flags |= TERMP_UNDER;
+                         if (p->enc == TERMENC_ASCII) {
-                         break;
+                                 cp = mchars_spec2str(seq, sz, &ssz);
-                 case ('P'):
+                                 if (cp != NULL)
-                         /* FALLTHROUGH */
+                                         encode(p, cp, ssz);
-                 case ('R'):
+                         } else {
-                         p->flags &= ~TERMP_STYLE;
+                                 uc = mchars_spec2cp(seq, sz);
-                         break;
+                                 if (uc > 0)
+                                         encode1(p, uc);
+                         }
+                         continue;
+                 case ESCAPE_FONTBOLD:
+                         term_fontrepl(p, TERMFONT_BOLD);
+                         continue;
+                 case ESCAPE_FONTITALIC:
+                         term_fontrepl(p, TERMFONT_UNDER);
+                         continue;
+                 case ESCAPE_FONTBI:
+                         term_fontrepl(p, TERMFONT_BI);
+                         continue;
+                 case ESCAPE_FONT:
+                 case ESCAPE_FONTROMAN:
+                         term_fontrepl(p, TERMFONT_NONE);
+                         continue;
+                 case ESCAPE_FONTPREV:
+                         term_fontlast(p);
+                         continue;
+                 case ESCAPE_NOSPACE:
+                         if (p->flags & TERMP_BACKAFTER)
+                                 p->flags &= ~TERMP_BACKAFTER;
+                         else if (*word == '\0')
+                                 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
+                         continue;
+                 case ESCAPE_SKIPCHAR:
+                         p->flags |= TERMP_BACKAFTER;
+                         continue;
+                 case ESCAPE_OVERSTRIKE:
+                         cp = seq + sz;
+                         while (seq < cp) {
+                                 if (*seq == '\\') {
+                                         mandoc_escape(&seq, NULL, NULL);
+                                         continue;
+                                 }
+                                 encode1(p, *seq++);
+                                 if (seq < cp) {
+                                         if (p->flags & TERMP_BACKBEFORE)
+                                                 p->flags |= TERMP_BACKAFTER;
+                                         else
+                                                 p->flags |= TERMP_BACKBEFORE;
+                                 }
+                         }
+                         /* Trim trailing backspace/blank pair. */
+                         if (p->col > 2 &&
+                             (p->buf[p->col - 1] == ' ' ||
+                              p->buf[p->col - 1] == '\t'))
+                                 p->col -= 2;
+                         continue;
                  default:
-                         break;
+                         continue;
                  }
-                 *word = wp;
+                 /*
-                 return;
+                  * Common handling for Unicode and numbered
+                  * character escape sequences.
+                  */
-         } else if ('[' != *wp) {
+                 if (p->enc == TERMENC_ASCII) {
-                 term_nescape(p, wp, 1);
+                         cp = ascii_uc2str(uc);
-                 *word = wp;
+                         encode(p, cp, strlen(cp));
-                 return;
+                 } else {
+                         if ((uc < 0x20 && uc != 0x09) ||
+                             (uc > 0x7E && uc < 0xA0))
+                                 uc = 0xFFFD;
+                         encode1(p, uc);
+                 }
          }
+         p->flags &= ~TERMP_NBRWORD;
+ }
-         wp++;
+ static void
-         for (j = 0; *wp && ']' != *wp; wp++, j++)
+ adjbuf(struct termp *p, size_t sz)
-                 /* Loop... */ ;
+ {
-         if (0 == *wp) {
+         if (0 == p->maxcols)
-                 *word = wp;
+                 p->maxcols = 1024;
-                 return;
+         while (sz >= p->maxcols)
-         }
+                 p->maxcols <<= 2;
-         term_nescape(p, wp - j, (size_t)j);
+         p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int));
-         *word = wp;
  }
+ static void
+ bufferc(struct termp *p, char c)
+ {
+         if (p->col + 1 >= p->maxcols)
+                 adjbuf(p, p->col + 1);
+         p->buf[p->col++] = c;
+ }
  /*
-  * Handle pwords, partial words, which may be either a single word or a
+  * See encode().
-  * phrase that cannot be broken down (such as a literal string).  This
+  * Do this for a single (probably unicode) value.
-  * handles word styling.
+  * Does not check for non-decorated glyphs.
   */
- void
+ static void
- term_word(struct termp *p, const char *word)
+ encode1(struct termp *p, int c)
  {
-         const char       *sv;
+         enum termfont     f;
-         if (term_isclosedelim(word))
+         if (p->col + 7 >= p->maxcols)
-                 if ( ! (TERMP_IGNDELIM & p->flags))
+                 adjbuf(p, p->col + 7);
-                         p->flags |= TERMP_NOSPACE;
-         if ( ! (TERMP_NOSPACE & p->flags))
+         f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
-                 term_chara(p, ' ');
+             p->fontq[p->fonti] : TERMFONT_NONE;
-         if ( ! (p->flags & TERMP_NONOSPACE))
+         if (p->flags & TERMP_BACKBEFORE) {
-                 p->flags &= ~TERMP_NOSPACE;
+                 if (p->buf[p->col - 1] == ' ' || p->buf[p->col - 1] == '\t')
+                         p->col--;
+                 else
+                         p->buf[p->col++] = 8;
+                 p->flags &= ~TERMP_BACKBEFORE;
+         }
+         if (TERMFONT_UNDER == f || TERMFONT_BI == f) {
+                 p->buf[p->col++] = '_';
+                 p->buf[p->col++] = 8;
+         }
+         if (TERMFONT_BOLD == f || TERMFONT_BI == f) {
+                 if (ASCII_HYPH == c)
+                         p->buf[p->col++] = '-';
+                 else
+                         p->buf[p->col++] = c;
+                 p->buf[p->col++] = 8;
+         }
+         p->buf[p->col++] = c;
+         if (p->flags & TERMP_BACKAFTER) {
+                 p->flags |= TERMP_BACKBEFORE;
+                 p->flags &= ~TERMP_BACKAFTER;
+         }
+ }
-         /*
+ static void
-          * If ANSI (word-length styling), then apply our style now,
+ encode(struct termp *p, const char *word, size_t sz)
-          * before the word.
+ {
-          */
+         size_t            i;
-         for (sv = word; *word; word++)
+         if (p->col + 2 + (sz * 5) >= p->maxcols)
-                 if ('\\' != *word)
+                 adjbuf(p, p->col + 2 + (sz * 5));
-                         term_encodea(p, *word);
-                 else
-                         term_pescape(p, &word);
-         if (term_isopendelim(sv))
+         for (i = 0; i < sz; i++) {
-                 p->flags |= TERMP_NOSPACE;
+                 if (ASCII_HYPH == word[i] ||
+                     isgraph((unsigned char)word[i]))
+                         encode1(p, word[i]);
+                 else
+                         p->buf[p->col++] = word[i];
+         }
  }
+ void
- /*
+ term_setwidth(struct termp *p, const char *wstr)
-  * Insert a single character into the line-buffer.  If the buffer's
-  * space is exceeded, then allocate more space by doubling the buffer
-  * size.
-  */
- static void
- term_chara(struct termp *p, char c)
  {
-         size_t           s;
+         struct roffsu    su;
+         int              iop, width;
-         if (p->col + 1 >= p->maxcols) {
+         iop = 0;
-                 if (0 == p->maxcols)
+         width = 0;
-                         p->maxcols = 256;
+         if (NULL != wstr) {
-                 s = p->maxcols * 2;
+                 switch (*wstr) {
-                 p->buf = realloc(p->buf, s);
+                 case '+':
-                 if (NULL == p->buf)
+                         iop = 1;
-                         err(1, "realloc");
+                         wstr++;
-                 p->maxcols = s;
+                         break;
+                 case '-':
+                         iop = -1;
+                         wstr++;
+                         break;
+                 default:
+                         break;
+                 }
+                 if (a2roffsu(wstr, &su, SCALE_MAX))
+                         width = term_hspan(p, &su);
+                 else
+                         iop = 0;
          }
-         p->buf[(int)(p->col)++] = c;
+         (*p->setwidth)(p, iop, width);
  }
+ size_t
+ term_len(const struct termp *p, size_t sz)
+ {
- static void
+         return (*p->width)(p, ' ') * sz;
- term_encodea(struct termp *p, char c)
+ }
+ static size_t
+ cond_width(const struct termp *p, int c, int *skip)
  {
-         if (' ' != c && TERMP_STYLE & p->flags) {
+         if (*skip) {
-                 if (TERMP_BOLD & p->flags) {
+                 (*skip) = 0;
-                         term_chara(p, c);
+                 return 0;
-                         term_chara(p, 8);
+         } else
+                 return (*p->width)(p, c);
+ }
+ size_t
+ term_strlen(const struct termp *p, const char *cp)
+ {
+         size_t           sz, rsz, i;
+         int              ssz, skip, uc;
+         const char      *seq, *rhs;
+         enum mandoc_esc  esc;
+         static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
+                         ASCII_BREAK, '\0' };
+         /*
+          * Account for escaped sequences within string length
+          * calculations.  This follows the logic in term_word() as we
+          * must calculate the width of produced strings.
+          */
+         sz = 0;
+         skip = 0;
+         while ('\0' != *cp) {
+                 rsz = strcspn(cp, rej);
+                 for (i = 0; i < rsz; i++)
+                         sz += cond_width(p, *cp++, &skip);
+                 switch (*cp) {
+                 case '\\':
+                         cp++;
+                         esc = mandoc_escape(&cp, &seq, &ssz);
+                         if (ESCAPE_ERROR == esc)
+                                 continue;
+                         rhs = NULL;
+                         switch (esc) {
+                         case ESCAPE_UNICODE:
+                                 uc = mchars_num2uc(seq + 1, ssz - 1);
+                                 break;
+                         case ESCAPE_NUMBERED:
+                                 uc = mchars_num2char(seq, ssz);
+                                 if (uc < 0)
+                                         continue;
+                                 break;
+                         case ESCAPE_SPECIAL:
+                                 if (p->enc == TERMENC_ASCII) {
+                                         rhs = mchars_spec2str(seq, ssz, &rsz);
+                                         if (rhs != NULL)
+                                                 break;
+                                 } else {
+                                         uc = mchars_spec2cp(seq, ssz);
+                                         if (uc > 0)
+                                                 sz += cond_width(p, uc, &skip);
+                                 }
+                                 continue;
+                         case ESCAPE_SKIPCHAR:
+                                 skip = 1;
+                                 continue;
+                         case ESCAPE_OVERSTRIKE:
+                                 rsz = 0;
+                                 rhs = seq + ssz;
+                                 while (seq < rhs) {
+                                         if (*seq == '\\') {
+                                                 mandoc_escape(&seq, NULL, NULL);
+                                                 continue;
+                                         }
+                                         i = (*p->width)(p, *seq++);
+                                         if (rsz < i)
+                                                 rsz = i;
+                                 }
+                                 sz += rsz;
+                                 continue;
+                         default:
+                                 continue;
+                         }
+                         /*
+                          * Common handling for Unicode and numbered
+                          * character escape sequences.
+                          */
+                         if (rhs == NULL) {
+                                 if (p->enc == TERMENC_ASCII) {
+                                         rhs = ascii_uc2str(uc);
+                                         rsz = strlen(rhs);
+                                 } else {
+                                         if ((uc < 0x20 && uc != 0x09) ||
+                                             (uc > 0x7E && uc < 0xA0))
+                                                 uc = 0xFFFD;
+                                         sz += cond_width(p, uc, &skip);
+                                         continue;
+                                 }
+                         }
+                         if (skip) {
+                                 skip = 0;
+                                 break;
+                         }
+                         /*
+                          * Common handling for all escape sequences
+                          * printing more than one character.
+                          */
+                         for (i = 0; i < rsz; i++)
+                                 sz += (*p->width)(p, *rhs++);
+                         break;
+                 case ASCII_NBRSP:
+                         sz += cond_width(p, ' ', &skip);
+                         cp++;
+                         break;
+                 case ASCII_HYPH:
+                         sz += cond_width(p, '-', &skip);
+                         cp++;
+                         break;
+                 default:
+                         break;
                  }
-                 if (TERMP_UNDER & p->flags) {
-                         term_chara(p, '_');
-                         term_chara(p, 8);
-                 }
          }
-         term_chara(p, c);
+         return sz;
+ }
+ int
+ term_vspan(const struct termp *p, const struct roffsu *su)
+ {
+         double           r;
+         int              ri;
+         switch (su->unit) {
+         case SCALE_BU:
+                 r = su->scale / 40.0;
+                 break;
+         case SCALE_CM:
+                 r = su->scale * 6.0 / 2.54;
+                 break;
+         case SCALE_FS:
+                 r = su->scale * 65536.0 / 40.0;
+                 break;
+         case SCALE_IN:
+                 r = su->scale * 6.0;
+                 break;
+         case SCALE_MM:
+                 r = su->scale * 0.006;
+                 break;
+         case SCALE_PC:
+                 r = su->scale;
+                 break;
+         case SCALE_PT:
+                 r = su->scale / 12.0;
+                 break;
+         case SCALE_EN:
+         case SCALE_EM:
+                 r = su->scale * 0.6;
+                 break;
+         case SCALE_VS:
+                 r = su->scale;
+                 break;
+         default:
+                 abort();
+         }
+         ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
+         return ri < 66 ? ri : 1;
+ }
+ /*
+  * Convert a scaling width to basic units, rounding down.
+  */
+ int
+ term_hspan(const struct termp *p, const struct roffsu *su)
+ {
+         return (*p->hspan)(p, su);
  }

CVSweb