version 1.113, 2009/10/26 17:05:44 |
version 1.133, 2010/05/12 16:01:01 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
*/ |
*/ |
|
#ifdef HAVE_CONFIG_H |
|
#include "config.h" |
|
#endif |
|
|
|
#include <sys/types.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <err.h> |
#include <ctype.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
|
|
#include "mdoc.h" |
#include "mdoc.h" |
#include "main.h" |
#include "main.h" |
|
|
/* FIXME: accomodate non-breaking, non-collapsing white-space. */ |
|
/* FIXME: accomodate non-breaking, collapsing white-space. */ |
|
|
|
static struct termp *term_alloc(enum termenc); |
static struct termp *term_alloc(enum termenc); |
static void term_free(struct termp *); |
static void term_free(struct termp *); |
|
static void spec(struct termp *, const char *, size_t); |
|
static void res(struct termp *, const char *, size_t); |
|
static void buffera(struct termp *, const char *, size_t); |
|
static void bufferc(struct termp *, char); |
|
static void adjbuf(struct termp *p, size_t); |
|
static void encode(struct termp *, const char *, size_t); |
|
|
static void do_escaped(struct termp *, const char **); |
|
static void do_special(struct termp *, |
|
const char *, size_t); |
|
static void do_reserved(struct termp *, |
|
const char *, size_t); |
|
static void buffer(struct termp *, char); |
|
static void encode(struct termp *, char); |
|
|
|
|
|
void * |
void * |
ascii_alloc(void) |
ascii_alloc(void) |
{ |
{ |
Line 77 term_alloc(enum termenc enc) |
|
Line 78 term_alloc(enum termenc enc) |
|
{ |
{ |
struct termp *p; |
struct termp *p; |
|
|
if (NULL == (p = malloc(sizeof(struct termp)))) |
p = calloc(1, sizeof(struct termp)); |
return(NULL); |
if (NULL == p) { |
bzero(p, sizeof(struct termp)); |
perror(NULL); |
p->maxrmargin = 78; |
exit(EXIT_FAILURE); |
|
} |
p->enc = enc; |
p->enc = enc; |
return(p); |
return(p); |
} |
} |
Line 90 term_alloc(enum termenc enc) |
|
Line 92 term_alloc(enum termenc enc) |
|
* Flush a line of text. A "line" is loosely defined as being something |
* Flush a line of text. A "line" is loosely defined as being something |
* that should be followed by a newline, regardless of whether it's |
* that should be followed by a newline, regardless of whether it's |
* broken apart by newlines getting there. A line can also be a |
* broken apart by newlines getting there. A line can also be a |
* fragment of a columnar list. |
* fragment of a columnar list (`Bl -tag' or `Bl -column'), which does |
|
* not have a trailing newline. |
* |
* |
* Specifically, a line is whatever's in p->buf of length p->col, which |
* The following flags may be specified: |
* is zeroed after this function returns. |
|
* |
* |
* The usage of termp:flags is as follows: |
|
* |
|
* - TERMP_NOLPAD: when beginning to write the line, don't left-pad the |
* - TERMP_NOLPAD: when beginning to write the line, don't left-pad the |
* offset value. This is useful when doing columnar lists where the |
* offset value. This is useful when doing columnar lists where the |
* prior column has right-padded. |
* prior column has right-padded. |
Line 120 term_alloc(enum termenc enc) |
|
Line 120 term_alloc(enum termenc enc) |
|
* If TERMP_NOBREAK is specified and the line overruns the right |
* If TERMP_NOBREAK is specified and the line overruns the right |
* margin, it will break and pad-right to the right margin after |
* margin, it will break and pad-right to the right margin after |
* writing. If maxrmargin is violated, it will break and continue |
* writing. If maxrmargin is violated, it will break and continue |
* writing from the right-margin, which will lead to the above |
* writing from the right-margin, which will lead to the above scenario |
* scenario upon exit. |
* upon exit. Otherwise, the line will break at the right margin. |
* |
|
* Otherwise, the line will break at the right margin. Extremely long |
|
* lines will cause the system to emit a warning (TODO: hyphenate, if |
|
* possible). |
|
*/ |
*/ |
void |
void |
term_flushln(struct termp *p) |
term_flushln(struct termp *p) |
{ |
{ |
int i, j; |
int i; /* current input position in p->buf */ |
size_t vbl, vsz, vis, maxvis, mmax, bp; |
size_t vis; /* current visual position on output */ |
static int overstep = 0; |
size_t vbl; /* number of blanks to prepend to output */ |
|
size_t vsz; /* visual characters to write to output */ |
|
size_t bp; /* visual right border position */ |
|
int j; /* temporary loop index */ |
|
size_t maxvis, mmax; |
|
|
/* |
/* |
* First, establish the maximum columns of "visible" content. |
* First, establish the maximum columns of "visible" content. |
* This is usually the difference between the right-margin and |
* This is usually the difference between the right-margin and |
* an indentation, but can be, for tagged lists or columns, a |
* an indentation, but can be, for tagged lists or columns, a |
* small set of values. |
* small set of values. |
*/ |
*/ |
|
|
assert(p->offset < p->rmargin); |
assert(p->offset < p->rmargin); |
assert((int)(p->rmargin - p->offset) - overstep > 0); |
|
|
|
maxvis = /* LINTED */ |
maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ? |
p->rmargin - p->offset - overstep; |
/* LINTED */ |
mmax = /* LINTED */ |
0 : p->rmargin - p->offset - p->overstep; |
p->maxrmargin - p->offset - overstep; |
mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ? |
|
/* LINTED */ |
|
0 : p->maxrmargin - p->offset - p->overstep; |
|
|
bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; |
bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; |
|
|
|
/* |
|
* FIXME: if bp is zero, we still output the first word before |
|
* breaking the line. |
|
*/ |
|
|
vis = 0; |
vis = 0; |
|
|
/* |
/* |
Line 175 term_flushln(struct termp *p) |
|
Line 182 term_flushln(struct termp *p) |
|
for (j = i, vsz = 0; j < (int)p->col; j++) { |
for (j = i, vsz = 0; j < (int)p->col; j++) { |
if (j && ' ' == p->buf[j]) |
if (j && ' ' == p->buf[j]) |
break; |
break; |
else if (8 == p->buf[j]) |
if (8 == p->buf[j]) |
vsz--; |
vsz--; |
else |
else |
vsz++; |
vsz++; |
Line 186 term_flushln(struct termp *p) |
|
Line 193 term_flushln(struct termp *p) |
|
* beginning of a line, one between words -- but do not |
* beginning of a line, one between words -- but do not |
* actually write them yet. |
* actually write them yet. |
*/ |
*/ |
|
|
vbl = (size_t)(0 == vis ? 0 : 1); |
vbl = (size_t)(0 == vis ? 0 : 1); |
|
|
/* |
/* |
* Find out whether we would exceed the right margin. |
* Find out whether we would exceed the right margin. |
* If so, break to the next line. (TODO: hyphenate) |
* If so, break to the next line. Otherwise, write the chosen |
* Otherwise, write the chosen number of blanks now. |
* number of blanks. |
*/ |
*/ |
|
|
if (vis && vis + vbl + vsz > bp) { |
if (vis && vis + vbl + vsz > bp) { |
putchar('\n'); |
putchar('\n'); |
if (TERMP_NOBREAK & p->flags) { |
if (TERMP_NOBREAK & p->flags) { |
Line 204 term_flushln(struct termp *p) |
|
Line 213 term_flushln(struct termp *p) |
|
putchar(' '); |
putchar(' '); |
vis = 0; |
vis = 0; |
} |
} |
/* Remove the overstep width. */ |
|
|
/* Remove the p->overstep width. */ |
|
|
bp += (int)/* LINTED */ |
bp += (int)/* LINTED */ |
overstep; |
p->overstep; |
overstep = 0; |
p->overstep = 0; |
} else { |
} else { |
for (j = 0; j < (int)vbl; j++) |
for (j = 0; j < (int)vbl; j++) |
putchar(' '); |
putchar(' '); |
vis += vbl; |
vis += vbl; |
} |
} |
|
|
/* |
/* Write out the [remaining] word. */ |
* Finally, write out the word. |
for ( ; i < (int)p->col; i++) |
*/ |
|
for ( ; i < (int)p->col; i++) { |
|
if (' ' == p->buf[i]) |
if (' ' == p->buf[i]) |
break; |
break; |
putchar(p->buf[i]); |
else if (31 == p->buf[i]) |
} |
putchar(' '); |
|
else |
|
putchar(p->buf[i]); |
|
|
vis += vsz; |
vis += vsz; |
} |
} |
|
|
p->col = 0; |
p->col = 0; |
overstep = 0; |
p->overstep = 0; |
|
|
if ( ! (TERMP_NOBREAK & p->flags)) { |
if ( ! (TERMP_NOBREAK & p->flags)) { |
putchar('\n'); |
putchar('\n'); |
Line 235 term_flushln(struct termp *p) |
|
Line 247 term_flushln(struct termp *p) |
|
|
|
if (TERMP_HANG & p->flags) { |
if (TERMP_HANG & p->flags) { |
/* We need one blank after the tag. */ |
/* We need one blank after the tag. */ |
overstep = /* LINTED */ |
p->overstep = /* LINTED */ |
vis - maxvis + 1; |
vis - maxvis + 1; |
|
|
/* |
/* |
Line 248 term_flushln(struct termp *p) |
|
Line 260 term_flushln(struct termp *p) |
|
* move it one step LEFT and flag the rest of the line |
* move it one step LEFT and flag the rest of the line |
* to be longer. |
* to be longer. |
*/ |
*/ |
if (overstep >= -1) { |
if (p->overstep >= -1) { |
assert((int)maxvis + overstep >= 0); |
assert((int)maxvis + p->overstep >= 0); |
/* LINTED */ |
/* LINTED */ |
maxvis += overstep; |
maxvis += p->overstep; |
} else |
} else |
overstep = 0; |
p->overstep = 0; |
|
|
} else if (TERMP_DANGLE & p->flags) |
} else if (TERMP_DANGLE & p->flags) |
return; |
return; |
Line 306 term_vspace(struct termp *p) |
|
Line 318 term_vspace(struct termp *p) |
|
|
|
|
|
static void |
static void |
do_special(struct termp *p, const char *word, size_t len) |
spec(struct termp *p, const char *word, size_t len) |
{ |
{ |
const char *rhs; |
const char *rhs; |
size_t sz; |
size_t sz; |
int i; |
|
|
|
rhs = chars_a2ascii(p->symtab, word, len, &sz); |
rhs = chars_a2ascii(p->symtab, word, len, &sz); |
|
if (rhs) |
if (NULL == rhs) { |
encode(p, rhs, sz); |
#if 0 |
|
fputs("Unknown special character: ", stderr); |
|
for (i = 0; i < (int)len; i++) |
|
fputc(word[i], stderr); |
|
fputc('\n', stderr); |
|
#endif |
|
return; |
|
} |
|
for (i = 0; i < (int)sz; i++) |
|
encode(p, rhs[i]); |
|
} |
} |
|
|
|
|
static void |
static void |
do_reserved(struct termp *p, const char *word, size_t len) |
res(struct termp *p, const char *word, size_t len) |
{ |
{ |
const char *rhs; |
const char *rhs; |
size_t sz; |
size_t sz; |
int i; |
|
|
|
rhs = chars_a2res(p->symtab, word, len, &sz); |
rhs = chars_a2res(p->symtab, word, len, &sz); |
|
if (rhs) |
|
encode(p, rhs, sz); |
|
} |
|
|
if (NULL == rhs) { |
|
#if 0 |
void |
fputs("Unknown reserved word: ", stderr); |
term_fontlast(struct termp *p) |
for (i = 0; i < (int)len; i++) |
{ |
fputc(word[i], stderr); |
enum termfont f; |
fputc('\n', stderr); |
|
#endif |
f = p->fontl; |
return; |
p->fontl = p->fontq[p->fonti]; |
} |
p->fontq[p->fonti] = f; |
for (i = 0; i < (int)sz; i++) |
|
encode(p, rhs[i]); |
|
} |
} |
|
|
|
|
/* |
void |
* Handle an escape sequence: determine its length and pass it to the |
term_fontrepl(struct termp *p, enum termfont f) |
* escape-symbol look table. Note that we assume mdoc(3) has validated |
|
* the escape sequence (we assert upon badly-formed escape sequences). |
|
*/ |
|
static void |
|
do_escaped(struct termp *p, const char **word) |
|
{ |
{ |
int j, type; |
|
const char *wp; |
|
|
|
wp = *word; |
p->fontl = p->fontq[p->fonti]; |
type = 1; |
p->fontq[p->fonti] = f; |
|
} |
|
|
if (0 == *(++wp)) { |
|
*word = wp; |
|
return; |
|
} |
|
|
|
if ('(' == *wp) { |
void |
wp++; |
term_fontpush(struct termp *p, enum termfont f) |
if (0 == *wp || 0 == *(wp + 1)) { |
{ |
*word = 0 == *wp ? wp : wp + 1; |
|
return; |
|
} |
|
|
|
do_special(p, wp, 2); |
assert(p->fonti + 1 < 10); |
*word = ++wp; |
p->fontl = p->fontq[p->fonti]; |
return; |
p->fontq[++p->fonti] = f; |
|
} |
|
|
} else if ('*' == *wp) { |
|
if (0 == *(++wp)) { |
|
*word = wp; |
|
return; |
|
} |
|
|
|
switch (*wp) { |
const void * |
case ('('): |
term_fontq(struct termp *p) |
wp++; |
{ |
if (0 == *wp || 0 == *(wp + 1)) { |
|
*word = 0 == *wp ? wp : wp + 1; |
|
return; |
|
} |
|
|
|
do_reserved(p, wp, 2); |
return(&p->fontq[p->fonti]); |
*word = ++wp; |
} |
return; |
|
case ('['): |
|
type = 0; |
|
break; |
|
default: |
|
do_reserved(p, wp, 1); |
|
*word = wp; |
|
return; |
|
} |
|
|
|
} else if ('f' == *wp) { |
|
if (0 == *(++wp)) { |
|
*word = wp; |
|
return; |
|
} |
|
|
|
switch (*wp) { |
|
case ('B'): |
|
p->bold++; |
|
break; |
|
case ('I'): |
|
p->under++; |
|
break; |
|
case ('P'): |
|
/* FALLTHROUGH */ |
|
case ('R'): |
|
p->bold = p->under = 0; |
|
break; |
|
default: |
|
break; |
|
} |
|
|
|
*word = wp; |
enum termfont |
return; |
term_fonttop(struct termp *p) |
|
{ |
|
|
} else if ('[' != *wp) { |
return(p->fontq[p->fonti]); |
do_special(p, wp, 1); |
} |
*word = wp; |
|
return; |
|
} |
|
|
|
wp++; |
|
for (j = 0; *wp && ']' != *wp; wp++, j++) |
|
/* Loop... */ ; |
|
|
|
if (0 == *wp) { |
void |
*word = wp; |
term_fontpopq(struct termp *p, const void *key) |
return; |
{ |
} |
|
|
|
if (type) |
while (p->fonti >= 0 && key != &p->fontq[p->fonti]) |
do_special(p, wp - j, (size_t)j); |
p->fonti--; |
else |
assert(p->fonti >= 0); |
do_reserved(p, wp - j, (size_t)j); |
|
*word = wp; |
|
} |
} |
|
|
|
|
|
void |
|
term_fontpop(struct termp *p) |
|
{ |
|
|
|
assert(p->fonti); |
|
p->fonti--; |
|
} |
|
|
|
|
/* |
/* |
* Handle pwords, partial words, which may be either a single word or a |
* Handle pwords, partial words, which may be either a single word or a |
* phrase that cannot be broken down (such as a literal string). This |
* phrase that cannot be broken down (such as a literal string). This |
Line 463 do_escaped(struct termp *p, const char **word) |
|
Line 414 do_escaped(struct termp *p, const char **word) |
|
void |
void |
term_word(struct termp *p, const char *word) |
term_word(struct termp *p, const char *word) |
{ |
{ |
const char *sv; |
const char *sv, *seq; |
|
int sz; |
|
size_t ssz; |
|
enum roffdeco deco; |
|
|
sv = word; |
sv = word; |
|
|
if (word[0] && 0 == word[1]) |
if (word[0] && '\0' == word[1]) |
switch (word[0]) { |
switch (word[0]) { |
case('.'): |
case('.'): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
Line 484 term_word(struct termp *p, const char *word) |
|
Line 438 term_word(struct termp *p, const char *word) |
|
case(')'): |
case(')'): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case(']'): |
case(']'): |
/* FALLTHROUGH */ |
|
case('}'): |
|
if ( ! (TERMP_IGNDELIM & p->flags)) |
if ( ! (TERMP_IGNDELIM & p->flags)) |
p->flags |= TERMP_NOSPACE; |
p->flags |= TERMP_NOSPACE; |
break; |
break; |
Line 493 term_word(struct termp *p, const char *word) |
|
Line 445 term_word(struct termp *p, const char *word) |
|
break; |
break; |
} |
} |
|
|
if ( ! (TERMP_NOSPACE & p->flags)) |
if ( ! (TERMP_NOSPACE & p->flags)) { |
buffer(p, ' '); |
bufferc(p, ' '); |
|
if (TERMP_SENTENCE & p->flags) |
|
bufferc(p, ' '); |
|
} |
|
|
if ( ! (p->flags & TERMP_NONOSPACE)) |
if ( ! (p->flags & TERMP_NONOSPACE)) |
p->flags &= ~TERMP_NOSPACE; |
p->flags &= ~TERMP_NOSPACE; |
|
|
for ( ; *word; word++) |
p->flags &= ~TERMP_SENTENCE; |
if ('\\' != *word) |
|
encode(p, *word); |
|
else |
|
do_escaped(p, &word); |
|
|
|
|
/* FIXME: use strcspn. */ |
|
|
|
while (*word) { |
|
if ('\\' != *word) { |
|
encode(p, word, 1); |
|
word++; |
|
continue; |
|
} |
|
|
|
seq = ++word; |
|
sz = a2roffdeco(&deco, &seq, &ssz); |
|
|
|
switch (deco) { |
|
case (DECO_RESERVED): |
|
res(p, seq, ssz); |
|
break; |
|
case (DECO_SPECIAL): |
|
spec(p, seq, ssz); |
|
break; |
|
case (DECO_BOLD): |
|
term_fontrepl(p, TERMFONT_BOLD); |
|
break; |
|
case (DECO_ITALIC): |
|
term_fontrepl(p, TERMFONT_UNDER); |
|
break; |
|
case (DECO_ROMAN): |
|
term_fontrepl(p, TERMFONT_NONE); |
|
break; |
|
case (DECO_PREVIOUS): |
|
term_fontlast(p); |
|
break; |
|
default: |
|
break; |
|
} |
|
|
|
word += sz; |
|
if (DECO_NOSPACE == deco && '\0' == *word) |
|
p->flags |= TERMP_NOSPACE; |
|
} |
|
|
|
/* |
|
* Note that we don't process the pipe: the parser sees it as |
|
* punctuation, but we don't in terms of typography. |
|
*/ |
if (sv[0] && 0 == sv[1]) |
if (sv[0] && 0 == sv[1]) |
switch (sv[0]) { |
switch (sv[0]) { |
case('('): |
case('('): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case('['): |
case('['): |
/* FALLTHROUGH */ |
|
case('{'): |
|
p->flags |= TERMP_NOSPACE; |
p->flags |= TERMP_NOSPACE; |
break; |
break; |
default: |
default: |
Line 520 term_word(struct termp *p, const char *word) |
|
Line 513 term_word(struct termp *p, const char *word) |
|
} |
} |
|
|
|
|
/* |
|
* Insert a single character into the line-buffer. If the buffer's |
|
* space is exceeded, then allocate more space by doubling the buffer |
|
* size. |
|
*/ |
|
static void |
static void |
buffer(struct termp *p, char c) |
adjbuf(struct termp *p, size_t sz) |
{ |
{ |
size_t s; |
|
|
|
if (p->col + 1 >= p->maxcols) { |
if (0 == p->maxcols) |
if (0 == p->maxcols) |
p->maxcols = 1024; |
p->maxcols = 256; |
while (sz >= p->maxcols) |
s = p->maxcols * 2; |
p->maxcols <<= 2; |
p->buf = realloc(p->buf, s); |
|
if (NULL == p->buf) |
p->buf = realloc(p->buf, p->maxcols); |
err(1, "realloc"); /* FIXME: shouldn't be here! */ |
if (NULL == p->buf) { |
p->maxcols = s; |
perror(NULL); |
|
exit(EXIT_FAILURE); |
} |
} |
p->buf[(int)(p->col)++] = c; |
|
} |
} |
|
|
|
|
static void |
static void |
encode(struct termp *p, char c) |
buffera(struct termp *p, const char *word, size_t sz) |
{ |
{ |
|
|
if (' ' != c) { |
if (p->col + sz >= p->maxcols) |
if (p->under) { |
adjbuf(p, p->col + sz); |
buffer(p, '_'); |
|
buffer(p, 8); |
memcpy(&p->buf[(int)p->col], word, sz); |
|
p->col += sz; |
|
} |
|
|
|
|
|
static void |
|
bufferc(struct termp *p, char c) |
|
{ |
|
|
|
if (p->col + 1 >= p->maxcols) |
|
adjbuf(p, p->col + 1); |
|
|
|
p->buf[(int)p->col++] = c; |
|
} |
|
|
|
|
|
static void |
|
encode(struct termp *p, const char *word, size_t sz) |
|
{ |
|
enum termfont f; |
|
int i; |
|
|
|
/* |
|
* Encode and buffer a string of characters. If the current |
|
* font mode is unset, buffer directly, else encode then buffer |
|
* character by character. |
|
*/ |
|
|
|
if (TERMFONT_NONE == (f = term_fonttop(p))) { |
|
buffera(p, word, sz); |
|
return; |
|
} |
|
|
|
for (i = 0; i < (int)sz; i++) { |
|
if ( ! isgraph((u_char)word[i])) { |
|
bufferc(p, word[i]); |
|
continue; |
} |
} |
if (p->bold) { |
|
buffer(p, c); |
if (TERMFONT_UNDER == f) |
buffer(p, 8); |
bufferc(p, '_'); |
} |
else |
|
bufferc(p, word[i]); |
|
|
|
bufferc(p, 8); |
|
bufferc(p, word[i]); |
} |
} |
buffer(p, c); |
|
} |
} |
|
|
|
|