mandoc/term.c - diff

Return to term.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/term.c between version 1.113 and 1.264

-version 1.113, 2009/10/26 17:05:44
+version 1.264, 2017/06/04 22:44:15
 Line 1
 Line 1
 Line 1
  /*      $Id$ */
  /*
-  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
+  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+  * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
   * copyright notice and this permission notice appear in all copies.
   *
-  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   */
+ #include "config.h"
+ #include <sys/types.h>
  #include <assert.h>
- #include <err.h>
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
- #include <time.h>
- #include "chars.h"
+ #include "mandoc.h"
+ #include "mandoc_aux.h"
  #include "out.h"
  #include "term.h"
- #include "man.h"
- #include "mdoc.h"
  #include "main.h"
- /* FIXME: accomodate non-breaking, non-collapsing white-space. */
+ static  size_t           cond_width(const struct termp *, int, int *);
- /* FIXME: accomodate non-breaking, collapsing white-space. */
+ static  void             adjbuf(struct termp *p, size_t);
+ static  void             bufferc(struct termp *, char);
+ static  void             encode(struct termp *, const char *, size_t);
+ static  void             encode1(struct termp *, int);
+ static  void             endline(struct termp *);
- static  struct termp     *term_alloc(enum termenc);
- static  void              term_free(struct termp *);
- static  void              do_escaped(struct termp *, const char **);
+ void
- static  void              do_special(struct termp *,
+ term_free(struct termp *p)
-                                 const char *, size_t);
- static  void              do_reserved(struct termp *,
-                                 const char *, size_t);
- static  void              buffer(struct termp *, char);
- static  void              encode(struct termp *, char);
- void *
- ascii_alloc(void)
  {
-         return(term_alloc(TERMENC_ASCII));
+         free(p->buf);
+         free(p->fontq);
+         free(p);
  }
  void
- terminal_free(void *arg)
+ term_begin(struct termp *p, term_margin head,
+                 term_margin foot, const struct roff_meta *arg)
  {
-         term_free((struct termp *)arg);
+         p->headf = head;
+         p->footf = foot;
+         p->argf = arg;
+         (*p->begin)(p);
  }
+ void
- static void
+ term_end(struct termp *p)
- term_free(struct termp *p)
  {
-         if (p->buf)
+         (*p->end)(p);
-                 free(p->buf);
-         if (p->symtab)
-                 chars_free(p->symtab);
-         free(p);
  }
- static struct termp *
- term_alloc(enum termenc enc)
- {
-         struct termp *p;
-         if (NULL == (p = malloc(sizeof(struct termp))))
-                 return(NULL);
-         bzero(p, sizeof(struct termp));
-         p->maxrmargin = 78;
-         p->enc = enc;
-         return(p);
- }
  /*
-  * Flush a line of text.  A "line" is loosely defined as being something
+  * Flush a chunk of text.  By default, break the output line each time
-  * that should be followed by a newline, regardless of whether it's
+  * the right margin is reached, and continue output on the next line
-  * broken apart by newlines getting there.  A line can also be a
+  * at the same offset as the chunk itself.  By default, also break the
-  * fragment of a columnar list.
+  * output line at the end of the chunk.
+  * The following flags may be specified:
   *
-  * Specifically, a line is whatever's in p->buf of length p->col, which
+  *  - TERMP_NOBREAK: Do not break the output line at the right margin,
-  * is zeroed after this function returns.
+  *    but only at the max right margin.  Also, do not break the output
-  *
+  *    line at the end of the chunk, such that the next call can pad to
-  * The usage of termp:flags is as follows:
+  *    the next column.  However, if less than p->trailspace blanks,
-  *
+  *    which can be 0, 1, or 2, remain to the right margin, the line
-  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
+  *    will be broken.
-  *    offset value.  This is useful when doing columnar lists where the
+  *  - TERMP_BRTRSP: Consider trailing whitespace significant
-  *    prior column has right-padded.
+  *    when deciding whether the chunk fits or not.
-  *
+  *  - TERMP_BRIND: If the chunk does not fit and the output line has
-  *  - TERMP_NOBREAK: this is the most important and is used when making
+  *    to be broken, start the next line at the right margin instead
-  *    columns.  In short: don't print a newline and instead pad to the
+  *    of at the offset.  Used together with TERMP_NOBREAK for the tags
-  *    right margin.  Used in conjunction with TERMP_NOLPAD.
+  *    in various kinds of tagged lists.
-  *
+  *  - TERMP_HANG: Do not break the output line at the right margin,
-  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
+  *    append the next chunk after it even if this one is too long.
-  *    space characters of padding.  Otherwise, rather break the line.
+  *    To be used together with TERMP_NOBREAK.
-  *
+  *  - TERMP_NOPAD: Start writing at the current position,
-  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
+  *    do not pad with blank characters up to the offset.
-  *    the line is overrun, and don't pad-right if it's underrun.
-  *
-  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
-  *    overruning, instead save the position and continue at that point
-  *    when the next invocation.
-  *
-  *  In-line line breaking:
-  *
-  *  If TERMP_NOBREAK is specified and the line overruns the right
-  *  margin, it will break and pad-right to the right margin after
-  *  writing.  If maxrmargin is violated, it will break and continue
-  *  writing from the right-margin, which will lead to the above
-  *  scenario upon exit.
-  *
-  *  Otherwise, the line will break at the right margin.  Extremely long
-  *  lines will cause the system to emit a warning (TODO: hyphenate, if
-  *  possible).
   */
  void
  term_flushln(struct termp *p)
  {
-         int              i, j;
+         size_t           i;     /* current input position in p->buf */
-         size_t           vbl, vsz, vis, maxvis, mmax, bp;
+         int              ntab;  /* number of tabs to prepend */
-         static int       overstep = 0;
+         size_t           vis;   /* current visual position on output */
+         size_t           vbl;   /* number of blanks to prepend to output */
+         size_t           vend;  /* end of word visual position on output */
+         size_t           bp;    /* visual right border position */
+         size_t           dv;    /* temporary for visual pos calculations */
+         size_t           j;     /* temporary loop index for p->buf */
+         size_t           jhy;   /* last hyph before overflow w/r/t j */
+         size_t           maxvis; /* output position of visible boundary */
-         /*
+         vbl = (p->flags & TERMP_NOPAD) || p->offset < p->viscol ? 0 :
-          * First, establish the maximum columns of "visible" content.
+             p->offset - p->viscol;
-          * This is usually the difference between the right-margin and
+         if (p->minbl && vbl < p->minbl)
-          * an indentation, but can be, for tagged lists or columns, a
+                 vbl = p->minbl;
-          * small set of values.
+         maxvis = p->rmargin > p->viscol + vbl ?
-          */
+             p->rmargin - p->viscol - vbl : 0;
+         bp = !(p->flags & TERMP_NOBREAK) ? maxvis :
+             p->maxrmargin > p->viscol + vbl ?
+             p->maxrmargin - p->viscol - vbl : 0;
+         vis = vend = 0;
+         i = 0;
-         assert(p->offset < p->rmargin);
+         while (i < p->col) {
-         assert((int)(p->rmargin - p->offset) - overstep > 0);
+                 /*
+                  * Handle literal tab characters: collapse all
+                  * subsequent tabs into a single huge set of spaces.
+                  */
+                 ntab = 0;
+                 while (i < p->col && p->buf[i] == '\t') {
+                         vend = term_tab_next(vis);
+                         vbl += vend - vis;
+                         vis = vend;
+                         ntab++;
+                         i++;
+                 }
-         maxvis = /* LINTED */
-                 p->rmargin - p->offset - overstep;
-         mmax = /* LINTED */
-                 p->maxrmargin - p->offset - overstep;
-         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
-         vis = 0;
-         /*
-          * If in the standard case (left-justified), then begin with our
-          * indentation, otherwise (columns, etc.) just start spitting
-          * out text.
-          */
-         if ( ! (p->flags & TERMP_NOLPAD))
-                 /* LINTED */
-                 for (j = 0; j < (int)p->offset; j++)
-                         putchar(' ');
-         for (i = 0; i < (int)p->col; i++) {
                  /*
                   * Count up visible word characters.  Control sequences
                   * (starting with the CSI) aren't counted.  A space
-Line 171  term_flushln(struct termp *p)
+Line 138  term_flushln(struct termp *p)
 Line 171  term_flushln(struct termp *p)
 Line 138  term_flushln(struct termp *p)
                   * space is printed according to regular spacing rules).
                   */
-                 /* LINTED */
+                 for (j = i, jhy = 0; j < p->col; j++) {
-                 for (j = i, vsz = 0; j < (int)p->col; j++) {
+                         if (' ' == p->buf[j] || '\t' == p->buf[j])
-                         if (j && ' ' == p->buf[j])
                                  break;
-                         else if (8 == p->buf[j])
-                                 vsz--;
+                         /* Back over the last printed character. */
-                         else
+                         if (8 == p->buf[j]) {
-                                 vsz++;
+                                 assert(j);
+                                 vend -= (*p->width)(p, p->buf[j - 1]);
+                                 continue;
+                         }
+                         /* Regular word. */
+                         /* Break at the hyphen point if we overrun. */
+                         if (vend > vis && vend < bp &&
+                             (ASCII_HYPH == p->buf[j] ||
+                              ASCII_BREAK == p->buf[j]))
+                                 jhy = j;
+                         /*
+                          * Hyphenation now decided, put back a real
+                          * hyphen such that we get the correct width.
+                          */
+                         if (ASCII_HYPH == p->buf[j])
+                                 p->buf[j] = '-';
+                         vend += (*p->width)(p, p->buf[j]);
                  }
                  /*
-                  * Choose the number of blanks to prepend: no blank at the
-                  * beginning of a line, one between words -- but do not
-                  * actually write them yet.
-                  */
-                 vbl = (size_t)(0 == vis ? 0 : 1);
-                 /*
                   * Find out whether we would exceed the right margin.
-                  * If so, break to the next line.  (TODO: hyphenate)
+                  * If so, break to the next line.
-                  * Otherwise, write the chosen number of blanks now.
                   */
-                 if (vis && vis + vbl + vsz > bp) {
+                 if (vend > bp && 0 == jhy && vis > 0 &&
-                         putchar('\n');
+                     (p->flags & TERMP_BRNEVER) == 0) {
-                         if (TERMP_NOBREAK & p->flags) {
+                         vend -= vis;
-                                 for (j = 0; j < (int)p->rmargin; j++)
+                         endline(p);
-                                         putchar(' ');
-                                 vis = p->rmargin - p->offset;
+                         /* Use pending tabs on the new line. */
-                         } else {
-                                 for (j = 0; j < (int)p->offset; j++)
+                         vbl = 0;
-                                         putchar(' ');
+                         while (ntab--)
-                                 vis = 0;
+                                 vbl = term_tab_next(vbl);
-                         }
-                         /* Remove the overstep width. */
+                         /* Re-establish indentation. */
-                         bp += (int)/* LINTED */
-                                 overstep;
+                         if (p->flags & TERMP_BRIND)
-                         overstep = 0;
+                                 vbl += p->rmargin;
-                 } else {
+                         else
-                         for (j = 0; j < (int)vbl; j++)
+                                 vbl += p->offset;
-                                 putchar(' ');
+                         maxvis = p->rmargin > vbl ? p->rmargin - vbl : 0;
-                         vis += vbl;
+                         bp = !(p->flags & TERMP_NOBREAK) ? maxvis :
+                             p->maxrmargin > vbl ?  p->maxrmargin - vbl : 0;
                  }
-                 /*
+                 /* Write out the [remaining] word. */
-                  * Finally, write out the word.
+                 for ( ; i < p->col; i++) {
-                  */
+                         if (vend > bp && jhy > 0 && i > jhy)
-                 for ( ; i < (int)p->col; i++) {
-                         if (' ' == p->buf[i])
                                  break;
-                         putchar(p->buf[i]);
+                         if ('\t' == p->buf[i])
-                 }
+                                 break;
-                 vis += vsz;
+                         if (' ' == p->buf[i]) {
-         }
+                                 j = i;
+                                 while (i < p->col && ' ' == p->buf[i])
+                                         i++;
+                                 dv = (i - j) * (*p->width)(p, ' ');
+                                 vbl += dv;
+                                 vend += dv;
+                                 break;
+                         }
+                         if (ASCII_NBRSP == p->buf[i]) {
+                                 vbl += (*p->width)(p, ' ');
+                                 continue;
+                         }
+                         if (ASCII_BREAK == p->buf[i])
+                                 continue;
-         p->col = 0;
+                         /*
-         overstep = 0;
+                          * Now we definitely know there will be
+                          * printable characters to output,
+                          * so write preceding white space now.
+                          */
+                         if (vbl) {
+                                 (*p->advance)(p, vbl);
+                                 p->viscol += vbl;
+                                 vbl = 0;
+                         }
-         if ( ! (TERMP_NOBREAK & p->flags)) {
+                         (*p->letter)(p, p->buf[i]);
-                 putchar('\n');
+                         if (8 == p->buf[i])
-                 return;
+                                 p->viscol -= (*p->width)(p, p->buf[i-1]);
+                         else
+                                 p->viscol += (*p->width)(p, p->buf[i]);
+                 }
+                 vis = vend;
          }
-         if (TERMP_HANG & p->flags) {
+         /*
-                 /* We need one blank after the tag. */
+          * If there was trailing white space, it was not printed;
-                 overstep = /* LINTED */
+          * so reset the cursor position accordingly.
-                         vis - maxvis + 1;
+          */
+         if (vis > vbl)
+                 vis -= vbl;
+         else
+                 vis = 0;
-                 /*
+         p->col = 0;
-                  * Behave exactly the same way as groff:
+         p->minbl = p->trailspace;
-                  * If we have overstepped the margin, temporarily move
+         p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
-                  * it to the right and flag the rest of the line to be
-                  * shorter.
-                  * If we landed right at the margin, be happy.
-                  * If we are one step before the margin, temporarily
-                  * move it one step LEFT and flag the rest of the line
-                  * to be longer.
-                  */
-                 if (overstep >= -1) {
-                         assert((int)maxvis + overstep >= 0);
-                         /* LINTED */
-                         maxvis += overstep;
-                 } else
-                         overstep = 0;
-         } else if (TERMP_DANGLE & p->flags)
+         /* Trailing whitespace is significant in some columns. */
-                 return;
+         if (vis && vbl && (TERMP_BRTRSP & p->flags))
+                 vis += vbl;
-         /* Right-pad. */
+         /* If the column was overrun, break the line. */
-         if (maxvis > vis + /* LINTED */
+         if ((p->flags & TERMP_NOBREAK) == 0 ||
-                         ((TERMP_TWOSPACE & p->flags) ? 1 : 0))
+             ((p->flags & TERMP_HANG) == 0 &&
-                 for ( ; vis < maxvis; vis++)
+              vis + p->trailspace * (*p->width)(p, ' ') > maxvis))
-                         putchar(' ');
+                 endline(p);
-         else {  /* ...or newline break. */
+ }
-                 putchar('\n');
-                 for (i = 0; i < (int)p->rmargin; i++)
+ static void
-                         putchar(' ');
+ endline(struct termp *p)
+ {
+         if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
+                 p->mc = NULL;
+                 p->flags &= ~TERMP_ENDMC;
          }
+         if (p->mc != NULL) {
+                 if (p->viscol && p->maxrmargin >= p->viscol)
+                         (*p->advance)(p, p->maxrmargin - p->viscol + 1);
+                 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
+                 term_word(p, p->mc);
+                 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
+         }
+         p->viscol = 0;
+         p->minbl = 0;
+         (*p->endline)(p);
  }
+ /*
- /*
   * A newline only breaks an existing line; it won't assert vertical
   * space.  All data in the output buffer is flushed prior to the newline
   * assertion.
-Line 281  term_newln(struct termp *p)
+Line 287  term_newln(struct termp *p)
 Line 281  term_newln(struct termp *p)
 Line 287  term_newln(struct termp *p)
  {
          p->flags |= TERMP_NOSPACE;
-         if (0 == p->col) {
+         if (p->col || p->viscol)
-                 p->flags &= ~TERMP_NOLPAD;
+                 term_flushln(p);
-                 return;
-         }
-         term_flushln(p);
-         p->flags &= ~TERMP_NOLPAD;
  }
  /*
   * Asserts a vertical space (a full, empty line-break between lines).
   * Note that if used twice, this will cause two blank spaces and so on.
-Line 301  term_vspace(struct termp *p)
+Line 302  term_vspace(struct termp *p)
 Line 301  term_vspace(struct termp *p)
 Line 302  term_vspace(struct termp *p)
  {
          term_newln(p);
-         putchar('\n');
+         p->viscol = 0;
+         p->minbl = 0;
+         if (0 < p->skipvsp)
+                 p->skipvsp--;
+         else
+                 (*p->endline)(p);
  }
+ /* Swap current and previous font; for \fP and .ft P */
+ void
+ term_fontlast(struct termp *p)
+ {
+         enum termfont    f;
- static void
+         f = p->fontl;
- do_special(struct termp *p, const char *word, size_t len)
+         p->fontl = p->fontq[p->fonti];
+         p->fontq[p->fonti] = f;
+ }
+ /* Set font, save current, discard previous; for \f, .ft, .B etc. */
+ void
+ term_fontrepl(struct termp *p, enum termfont f)
  {
-         const char      *rhs;
-         size_t           sz;
-         int              i;
-         rhs = chars_a2ascii(p->symtab, word, len, &sz);
+         p->fontl = p->fontq[p->fonti];
+         p->fontq[p->fonti] = f;
+ }
-         if (NULL == rhs) {
+ /* Set font, save previous. */
- #if 0
+ void
-                 fputs("Unknown special character: ", stderr);
+ term_fontpush(struct termp *p, enum termfont f)
-                 for (i = 0; i < (int)len; i++)
+ {
-                         fputc(word[i], stderr);
-                 fputc('\n', stderr);
+         p->fontl = p->fontq[p->fonti];
- #endif
+         if (++p->fonti == p->fontsz) {
-                 return;
+                 p->fontsz += 8;
+                 p->fontq = mandoc_reallocarray(p->fontq,
+                     p->fontsz, sizeof(*p->fontq));
          }
-         for (i = 0; i < (int)sz; i++)
+         p->fontq[p->fonti] = f;
-                 encode(p, rhs[i]);
  }
+ /* Flush to make the saved pointer current again. */
- static void
+ void
- do_reserved(struct termp *p, const char *word, size_t len)
+ term_fontpopq(struct termp *p, int i)
  {
-         const char      *rhs;
-         size_t           sz;
-         int              i;
-         rhs = chars_a2res(p->symtab, word, len, &sz);
+         assert(i >= 0);
+         if (p->fonti > i)
-         if (NULL == rhs) {
+                 p->fonti = i;
- #if 0
-                 fputs("Unknown reserved word: ", stderr);
-                 for (i = 0; i < (int)len; i++)
-                         fputc(word[i], stderr);
-                 fputc('\n', stderr);
- #endif
-                 return;
-         }
-         for (i = 0; i < (int)sz; i++)
-                 encode(p, rhs[i]);
  }
+ /* Pop one font off the stack. */
+ void
+ term_fontpop(struct termp *p)
+ {
+         assert(p->fonti);
+         p->fonti--;
+ }
  /*
-  * Handle an escape sequence: determine its length and pass it to the
+  * Handle pwords, partial words, which may be either a single word or a
-  * escape-symbol look table.  Note that we assume mdoc(3) has validated
+  * phrase that cannot be broken down (such as a literal string).  This
-  * the escape sequence (we assert upon badly-formed escape sequences).
+  * handles word styling.
   */
- static void
+ void
- do_escaped(struct termp *p, const char **word)
+ term_word(struct termp *p, const char *word)
  {
-         int              j, type;
+         struct roffsu    su;
-         const char      *wp;
+         const char       nbrsp[2] = { ASCII_NBRSP, 0 };
+         const char      *seq, *cp;
+         int              sz, uc;
+         size_t           csz, lsz, ssz;
+         enum mandoc_esc  esc;
-         wp = *word;
+         if ((p->flags & TERMP_NOBUF) == 0) {
-         type = 1;
+                 if ((p->flags & TERMP_NOSPACE) == 0) {
+                         if ((p->flags & TERMP_KEEP) == 0) {
-         if (0 == *(++wp)) {
+                                 bufferc(p, ' ');
-                 *word = wp;
+                                 if (p->flags & TERMP_SENTENCE)
-                 return;
+                                         bufferc(p, ' ');
+                         } else
+                                 bufferc(p, ASCII_NBRSP);
+                 }
+                 if (p->flags & TERMP_PREKEEP)
+                         p->flags |= TERMP_KEEP;
+                 if (p->flags & TERMP_NONOSPACE)
+                         p->flags |= TERMP_NOSPACE;
+                 else
+                         p->flags &= ~TERMP_NOSPACE;
+                 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
+                 p->skipvsp = 0;
          }
-         if ('(' == *wp) {
+         while ('\0' != *word) {
-                 wp++;
+                 if ('\\' != *word) {
-                 if (0 == *wp || 0 == *(wp + 1)) {
+                         if (TERMP_NBRWORD & p->flags) {
-                         *word = 0 == *wp ? wp : wp + 1;
+                                 if (' ' == *word) {
-                         return;
+                                         encode(p, nbrsp, 1);
+                                         word++;
+                                         continue;
+                                 }
+                                 ssz = strcspn(word, "\\ ");
+                         } else
+                                 ssz = strcspn(word, "\\");
+                         encode(p, word, ssz);
+                         word += (int)ssz;
+                         continue;
                  }
-                 do_special(p, wp, 2);
+                 word++;
-                 *word = ++wp;
+                 esc = mandoc_escape(&word, &seq, &sz);
-                 return;
+                 if (ESCAPE_ERROR == esc)
+                         continue;
-         } else if ('*' == *wp) {
+                 switch (esc) {
-                 if (0 == *(++wp)) {
+                 case ESCAPE_UNICODE:
-                         *word = wp;
+                         uc = mchars_num2uc(seq + 1, sz - 1);
-                         return;
-                 }
-                 switch (*wp) {
-                 case ('('):
-                         wp++;
-                         if (0 == *wp || 0 == *(wp + 1)) {
-                                 *word = 0 == *wp ? wp : wp + 1;
-                                 return;
-                         }
-                         do_reserved(p, wp, 2);
-                         *word = ++wp;
-                         return;
-                 case ('['):
-                         type = 0;
                          break;
+                 case ESCAPE_NUMBERED:
+                         uc = mchars_num2char(seq, sz);
+                         if (uc < 0)
+                                 continue;
+                         break;
+                 case ESCAPE_SPECIAL:
+                         if (p->enc == TERMENC_ASCII) {
+                                 cp = mchars_spec2str(seq, sz, &ssz);
+                                 if (cp != NULL)
+                                         encode(p, cp, ssz);
+                         } else {
+                                 uc = mchars_spec2cp(seq, sz);
+                                 if (uc > 0)
+                                         encode1(p, uc);
+                         }
+                         continue;
+                 case ESCAPE_FONTBOLD:
+                         term_fontrepl(p, TERMFONT_BOLD);
+                         continue;
+                 case ESCAPE_FONTITALIC:
+                         term_fontrepl(p, TERMFONT_UNDER);
+                         continue;
+                 case ESCAPE_FONTBI:
+                         term_fontrepl(p, TERMFONT_BI);
+                         continue;
+                 case ESCAPE_FONT:
+                 case ESCAPE_FONTROMAN:
+                         term_fontrepl(p, TERMFONT_NONE);
+                         continue;
+                 case ESCAPE_FONTPREV:
+                         term_fontlast(p);
+                         continue;
+                 case ESCAPE_NOSPACE:
+                         if (p->flags & TERMP_BACKAFTER)
+                                 p->flags &= ~TERMP_BACKAFTER;
+                         else if (*word == '\0')
+                                 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
+                         continue;
+                 case ESCAPE_HORIZ:
+                         if (a2roffsu(seq, &su, SCALE_EM) == 0)
+                                 continue;
+                         uc = term_hspan(p, &su) / 24;
+                         if (uc > 0)
+                                 while (uc-- > 0)
+                                         bufferc(p, ASCII_NBRSP);
+                         else if (p->col > (size_t)(-uc))
+                                 p->col += uc;
+                         else {
+                                 uc += p->col;
+                                 p->col = 0;
+                                 if (p->offset > (size_t)(-uc)) {
+                                         p->ti += uc;
+                                         p->offset += uc;
+                                 } else {
+                                         p->ti -= p->offset;
+                                         p->offset = 0;
+                                 }
+                         }
+                         continue;
+                 case ESCAPE_HLINE:
+                         if (a2roffsu(seq, &su, SCALE_EM) == 0)
+                                 continue;
+                         uc = term_hspan(p, &su) / 24;
+                         if (uc <= 0) {
+                                 if (p->rmargin <= p->offset)
+                                         continue;
+                                 lsz = p->rmargin - p->offset;
+                         } else
+                                 lsz = uc;
+                         while (sz &&
+                             strchr(" %&()*+-./0123456789:<=>", *seq)) {
+                                 seq++;
+                                 sz--;
+                         }
+                         if (sz && strchr("cifMmnPpuv", *seq)) {
+                                 seq++;
+                                 sz--;
+                         }
+                         if (sz == 0)
+                                 uc = -1;
+                         else if (*seq == '\\') {
+                                 seq++;
+                                 esc = mandoc_escape(&seq, &cp, &sz);
+                                 switch (esc) {
+                                 case ESCAPE_UNICODE:
+                                         uc = mchars_num2uc(cp + 1, sz - 1);
+                                         break;
+                                 case ESCAPE_NUMBERED:
+                                         uc = mchars_num2char(cp, sz);
+                                         break;
+                                 case ESCAPE_SPECIAL:
+                                         uc = mchars_spec2cp(cp, sz);
+                                         break;
+                                 default:
+                                         uc = -1;
+                                         break;
+                                 }
+                         } else
+                                 uc = *seq;
+                         if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
+                                 uc = '_';
+                         if (p->enc == TERMENC_ASCII) {
+                                 cp = ascii_uc2str(uc);
+                                 csz = term_strlen(p, cp);
+                                 ssz = strlen(cp);
+                         } else
+                                 csz = (*p->width)(p, uc);
+                         while (lsz >= csz) {
+                                 if (p->enc == TERMENC_ASCII)
+                                         encode(p, cp, ssz);
+                                 else
+                                         encode1(p, uc);
+                                 lsz -= csz;
+                         }
+                         continue;
+                 case ESCAPE_SKIPCHAR:
+                         p->flags |= TERMP_BACKAFTER;
+                         continue;
+                 case ESCAPE_OVERSTRIKE:
+                         cp = seq + sz;
+                         while (seq < cp) {
+                                 if (*seq == '\\') {
+                                         mandoc_escape(&seq, NULL, NULL);
+                                         continue;
+                                 }
+                                 encode1(p, *seq++);
+                                 if (seq < cp) {
+                                         if (p->flags & TERMP_BACKBEFORE)
+                                                 p->flags |= TERMP_BACKAFTER;
+                                         else
+                                                 p->flags |= TERMP_BACKBEFORE;
+                                 }
+                         }
+                         /* Trim trailing backspace/blank pair. */
+                         if (p->col > 2 &&
+                             (p->buf[p->col - 1] == ' ' ||
+                              p->buf[p->col - 1] == '\t'))
+                                 p->col -= 2;
+                         continue;
                  default:
-                         do_reserved(p, wp, 1);
+                         continue;
-                         *word = wp;
-                         return;
                  }
-         } else if ('f' == *wp) {
-                 if (0 == *(++wp)) {
-                         *word = wp;
-                         return;
-                 }
-                 switch (*wp) {
+                 /*
-                 case ('B'):
+                  * Common handling for Unicode and numbered
-                         p->bold++;
+                  * character escape sequences.
-                         break;
+                  */
-                 case ('I'):
-                         p->under++;
+                 if (p->enc == TERMENC_ASCII) {
-                         break;
+                         cp = ascii_uc2str(uc);
-                 case ('P'):
+                         encode(p, cp, strlen(cp));
-                         /* FALLTHROUGH */
+                 } else {
-                 case ('R'):
+                         if ((uc < 0x20 && uc != 0x09) ||
-                         p->bold = p->under = 0;
+                             (uc > 0x7E && uc < 0xA0))
-                         break;
+                                 uc = 0xFFFD;
-                 default:
+                         encode1(p, uc);
-                         break;
                  }
+         }
+         p->flags &= ~TERMP_NBRWORD;
+ }
-                 *word = wp;
+ static void
-                 return;
+ adjbuf(struct termp *p, size_t sz)
+ {
-         } else if ('[' != *wp) {
+         if (0 == p->maxcols)
-                 do_special(p, wp, 1);
+                 p->maxcols = 1024;
-                 *word = wp;
+         while (sz >= p->maxcols)
+                 p->maxcols <<= 2;
+         p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int));
+ }
+ static void
+ bufferc(struct termp *p, char c)
+ {
+         if (p->flags & TERMP_NOBUF) {
+                 (*p->letter)(p, c);
                  return;
          }
+         if (p->col + 1 >= p->maxcols)
+                 adjbuf(p, p->col + 1);
+         p->buf[p->col++] = c;
+ }
-         wp++;
+ /*
-         for (j = 0; *wp && ']' != *wp; wp++, j++)
+  * See encode().
-                 /* Loop... */ ;
+  * Do this for a single (probably unicode) value.
+  * Does not check for non-decorated glyphs.
+  */
+ static void
+ encode1(struct termp *p, int c)
+ {
+         enum termfont     f;
-         if (0 == *wp) {
+         if (p->flags & TERMP_NOBUF) {
-                 *word = wp;
+                 (*p->letter)(p, c);
                  return;
          }
-         if (type)
+         if (p->col + 7 >= p->maxcols)
-                 do_special(p, wp - j, (size_t)j);
+                 adjbuf(p, p->col + 7);
-         else
-                 do_reserved(p, wp - j, (size_t)j);
-         *word = wp;
- }
+         f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
+             p->fontq[p->fonti] : TERMFONT_NONE;
- /*
+         if (p->flags & TERMP_BACKBEFORE) {
-  * Handle pwords, partial words, which may be either a single word or a
+                 if (p->buf[p->col - 1] == ' ' || p->buf[p->col - 1] == '\t')
-  * phrase that cannot be broken down (such as a literal string).  This
+                         p->col--;
-  * handles word styling.
+                 else
-  */
+                         p->buf[p->col++] = 8;
- void
+                 p->flags &= ~TERMP_BACKBEFORE;
- term_word(struct termp *p, const char *word)
+         }
+         if (TERMFONT_UNDER == f || TERMFONT_BI == f) {
+                 p->buf[p->col++] = '_';
+                 p->buf[p->col++] = 8;
+         }
+         if (TERMFONT_BOLD == f || TERMFONT_BI == f) {
+                 if (ASCII_HYPH == c)
+                         p->buf[p->col++] = '-';
+                 else
+                         p->buf[p->col++] = c;
+                 p->buf[p->col++] = 8;
+         }
+         p->buf[p->col++] = c;
+         if (p->flags & TERMP_BACKAFTER) {
+                 p->flags |= TERMP_BACKBEFORE;
+                 p->flags &= ~TERMP_BACKAFTER;
+         }
+ }
+ static void
+ encode(struct termp *p, const char *word, size_t sz)
  {
-         const char       *sv;
+         size_t            i;
-         sv = word;
+         if (p->flags & TERMP_NOBUF) {
+                 for (i = 0; i < sz; i++)
+                         (*p->letter)(p, word[i]);
+                 return;
+         }
-         if (word[0] && 0 == word[1])
+         if (p->col + 2 + (sz * 5) >= p->maxcols)
-                 switch (word[0]) {
+                 adjbuf(p, p->col + 2 + (sz * 5));
-                 case('.'):
-                         /* FALLTHROUGH */
-                 case(','):
-                         /* FALLTHROUGH */
-                 case(';'):
-                         /* FALLTHROUGH */
-                 case(':'):
-                         /* FALLTHROUGH */
-                 case('?'):
-                         /* FALLTHROUGH */
-                 case('!'):
-                         /* FALLTHROUGH */
-                 case(')'):
-                         /* FALLTHROUGH */
-                 case(']'):
-                         /* FALLTHROUGH */
-                 case('}'):
-                         if ( ! (TERMP_IGNDELIM & p->flags))
-                                 p->flags |= TERMP_NOSPACE;
-                         break;
-                 default:
-                         break;
-                 }
-         if ( ! (TERMP_NOSPACE & p->flags))
+         for (i = 0; i < sz; i++) {
-                 buffer(p, ' ');
+                 if (ASCII_HYPH == word[i] ||
+                     isgraph((unsigned char)word[i]))
+                         encode1(p, word[i]);
+                 else {
+                         p->buf[p->col++] = word[i];
-         if ( ! (p->flags & TERMP_NONOSPACE))
+                         /*
-                 p->flags &= ~TERMP_NOSPACE;
+                          * Postpone the effect of \z while handling
+                          * an overstrike sequence from ascii_uc2str().
+                          */
-         for ( ; *word; word++)
+                         if (word[i] == '\b' &&
-                 if ('\\' != *word)
+                             (p->flags & TERMP_BACKBEFORE)) {
-                         encode(p, *word);
+                                 p->flags &= ~TERMP_BACKBEFORE;
-                 else
+                                 p->flags |= TERMP_BACKAFTER;
-                         do_escaped(p, &word);
+                         }
+                 }
+         }
+ }
-         if (sv[0] && 0 == sv[1])
+ void
-                 switch (sv[0]) {
+ term_setwidth(struct termp *p, const char *wstr)
-                 case('('):
+ {
-                         /* FALLTHROUGH */
+         struct roffsu    su;
-                 case('['):
+         int              iop, width;
-                         /* FALLTHROUGH */
-                 case('{'):
+         iop = 0;
-                         p->flags |= TERMP_NOSPACE;
+         width = 0;
+         if (NULL != wstr) {
+                 switch (*wstr) {
+                 case '+':
+                         iop = 1;
+                         wstr++;
                          break;
+                 case '-':
+                         iop = -1;
+                         wstr++;
+                         break;
                  default:
                          break;
                  }
+                 if (a2roffsu(wstr, &su, SCALE_MAX))
+                         width = term_hspan(p, &su);
+                 else
+                         iop = 0;
+         }
+         (*p->setwidth)(p, iop, width);
  }
+ size_t
- /*
+ term_len(const struct termp *p, size_t sz)
-  * Insert a single character into the line-buffer.  If the buffer's
-  * space is exceeded, then allocate more space by doubling the buffer
-  * size.
-  */
- static void
- buffer(struct termp *p, char c)
  {
-         size_t           s;
-         if (p->col + 1 >= p->maxcols) {
+         return (*p->width)(p, ' ') * sz;
-                 if (0 == p->maxcols)
-                         p->maxcols = 256;
-                 s = p->maxcols * 2;
-                 p->buf = realloc(p->buf, s);
-                 if (NULL == p->buf)
-                         err(1, "realloc"); /* FIXME: shouldn't be here! */
-                 p->maxcols = s;
-         }
-         p->buf[(int)(p->col)++] = c;
  }
+ static size_t
- static void
+ cond_width(const struct termp *p, int c, int *skip)
- encode(struct termp *p, char c)
  {
-         if (' ' != c) {
+         if (*skip) {
-                 if (p->under) {
+                 (*skip) = 0;
-                         buffer(p, '_');
+                 return 0;
-                         buffer(p, 8);
+         } else
-                 }
+                 return (*p->width)(p, c);
-                 if (p->bold) {
-                         buffer(p, c);
-                         buffer(p, 8);
-                 }
-         }
-         buffer(p, c);
  }
  size_t
- term_vspan(const struct roffsu *su)
+ term_strlen(const struct termp *p, const char *cp)
  {
-         double           r;
+         size_t           sz, rsz, i;
+         int              ssz, skip, uc;
+         const char      *seq, *rhs;
+         enum mandoc_esc  esc;
+         static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
+                         ASCII_BREAK, '\0' };
-         switch (su->unit) {
+         /*
-         case (SCALE_CM):
+          * Account for escaped sequences within string length
-                 r = su->scale * 2;
+          * calculations.  This follows the logic in term_word() as we
-                 break;
+          * must calculate the width of produced strings.
-         case (SCALE_IN):
+          */
-                 r = su->scale * 6;
-                 break;
+         sz = 0;
-         case (SCALE_PC):
+         skip = 0;
-                 r = su->scale;
+         while ('\0' != *cp) {
-                 break;
+                 rsz = strcspn(cp, rej);
-         case (SCALE_PT):
+                 for (i = 0; i < rsz; i++)
-                 r = su->scale / 8;
+                         sz += cond_width(p, *cp++, &skip);
-                 break;
-         case (SCALE_MM):
+                 switch (*cp) {
-                 r = su->scale / 1000;
+                 case '\\':
-                 break;
+                         cp++;
-         case (SCALE_VS):
+                         esc = mandoc_escape(&cp, &seq, &ssz);
-                 r = su->scale;
+                         if (ESCAPE_ERROR == esc)
-                 break;
+                                 continue;
-         default:
-                 r = su->scale - 1;
+                         rhs = NULL;
-                 break;
+                         switch (esc) {
+                         case ESCAPE_UNICODE:
+                                 uc = mchars_num2uc(seq + 1, ssz - 1);
+                                 break;
+                         case ESCAPE_NUMBERED:
+                                 uc = mchars_num2char(seq, ssz);
+                                 if (uc < 0)
+                                         continue;
+                                 break;
+                         case ESCAPE_SPECIAL:
+                                 if (p->enc == TERMENC_ASCII) {
+                                         rhs = mchars_spec2str(seq, ssz, &rsz);
+                                         if (rhs != NULL)
+                                                 break;
+                                 } else {
+                                         uc = mchars_spec2cp(seq, ssz);
+                                         if (uc > 0)
+                                                 sz += cond_width(p, uc, &skip);
+                                 }
+                                 continue;
+                         case ESCAPE_SKIPCHAR:
+                                 skip = 1;
+                                 continue;
+                         case ESCAPE_OVERSTRIKE:
+                                 rsz = 0;
+                                 rhs = seq + ssz;
+                                 while (seq < rhs) {
+                                         if (*seq == '\\') {
+                                                 mandoc_escape(&seq, NULL, NULL);
+                                                 continue;
+                                         }
+                                         i = (*p->width)(p, *seq++);
+                                         if (rsz < i)
+                                                 rsz = i;
+                                 }
+                                 sz += rsz;
+                                 continue;
+                         default:
+                                 continue;
+                         }
+                         /*
+                          * Common handling for Unicode and numbered
+                          * character escape sequences.
+                          */
+                         if (rhs == NULL) {
+                                 if (p->enc == TERMENC_ASCII) {
+                                         rhs = ascii_uc2str(uc);
+                                         rsz = strlen(rhs);
+                                 } else {
+                                         if ((uc < 0x20 && uc != 0x09) ||
+                                             (uc > 0x7E && uc < 0xA0))
+                                                 uc = 0xFFFD;
+                                         sz += cond_width(p, uc, &skip);
+                                         continue;
+                                 }
+                         }
+                         if (skip) {
+                                 skip = 0;
+                                 break;
+                         }
+                         /*
+                          * Common handling for all escape sequences
+                          * printing more than one character.
+                          */
+                         for (i = 0; i < rsz; i++)
+                                 sz += (*p->width)(p, *rhs++);
+                         break;
+                 case ASCII_NBRSP:
+                         sz += cond_width(p, ' ', &skip);
+                         cp++;
+                         break;
+                 case ASCII_HYPH:
+                         sz += cond_width(p, '-', &skip);
+                         cp++;
+                         break;
+                 default:
+                         break;
+                 }
          }
-         if (r < 0.0)
+         return sz;
-                 r = 0.0;
-         return(/* LINTED */(size_t)
-                         r);
  }
+ int
- size_t
+ term_vspan(const struct termp *p, const struct roffsu *su)
- term_hspan(const struct roffsu *su)
  {
          double           r;
+         int              ri;
-         /* XXX: CM, IN, and PT are approximations. */
          switch (su->unit) {
-         case (SCALE_CM):
+         case SCALE_BU:
-                 r = 4 * su->scale;
+                 r = su->scale / 40.0;
                  break;
-         case (SCALE_IN):
+         case SCALE_CM:
-                 /* XXX: this is an approximation. */
+                 r = su->scale * 6.0 / 2.54;
-                 r = 10 * su->scale;
                  break;
-         case (SCALE_PC):
+         case SCALE_FS:
-                 r = (10 * su->scale) / 6;
+                 r = su->scale * 65536.0 / 40.0;
                  break;
-         case (SCALE_PT):
+         case SCALE_IN:
-                 r = (10 * su->scale) / 72;
+                 r = su->scale * 6.0;
                  break;
-         case (SCALE_MM):
+         case SCALE_MM:
-                 r = su->scale / 1000; /* FIXME: double-check. */
+                 r = su->scale * 0.006;
                  break;
-         case (SCALE_VS):
+         case SCALE_PC:
-                 r = su->scale * 2 - 1; /* FIXME: double-check. */
+                 r = su->scale;
                  break;
-         default:
+         case SCALE_PT:
+                 r = su->scale / 12.0;
+                 break;
+         case SCALE_EN:
+         case SCALE_EM:
+                 r = su->scale * 0.6;
+                 break;
+         case SCALE_VS:
                  r = su->scale;
                  break;
+         default:
+                 abort();
          }
+         ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
-         if (r < 0.0)
+         return ri < 66 ? ri : 1;
-                 r = 0.0;
-         return((size_t)/* LINTED */
-                         r);
  }
+ /*
+  * Convert a scaling width to basic units, rounding down.
+  */
+ int
+ term_hspan(const struct termp *p, const struct roffsu *su)
+ {
+         return (*p->hspan)(p, su);
+ }

CVSweb