=================================================================== RCS file: /cvs/mandoc/term.c,v retrieving revision 1.191 retrieving revision 1.199 diff -u -p -r1.191 -r1.199 --- mandoc/term.c 2011/05/15 22:29:50 1.191 +++ mandoc/term.c 2011/09/18 21:18:19 1.199 @@ -1,6 +1,6 @@ -/* $Id: term.c,v 1.191 2011/05/15 22:29:50 kristaps Exp $ */ +/* $Id: term.c,v 1.199 2011/09/18 21:18:19 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any @@ -36,6 +36,7 @@ static void adjbuf(struct termp *p, int); static void bufferc(struct termp *, char); static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); void term_free(struct termp *p) @@ -69,18 +70,6 @@ term_end(struct termp *p) (*p->end)(p); } - -struct termp * -term_alloc(enum termenc enc) -{ - struct termp *p; - - p = mandoc_calloc(1, sizeof(struct termp)); - p->enc = enc; - return(p); -} - - /* * Flush a line of text. A "line" is loosely defined as being something * that should be followed by a newline, regardless of whether it's @@ -415,7 +404,7 @@ term_word(struct termp *p, const char *word) { const char *seq, *cp; char c; - int sz; + int sz, uc; size_t ssz; enum mandoc_esc esc; @@ -450,16 +439,33 @@ term_word(struct termp *p, const char *word) if (ESCAPE_ERROR == esc) break; + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + uc = mchars_num2uc(seq + 1, sz - 1); + if ('\0' == uc) + break; + encode1(p, uc); + continue; + case (ESCAPE_SPECIAL): + uc = mchars_spec2cp(p->symtab, seq, sz); + if (uc <= 0) + break; + encode1(p, uc); + continue; + default: + break; + } + switch (esc) { + case (ESCAPE_UNICODE): + encode1(p, '?'); + break; case (ESCAPE_NUMBERED): - if ('\0' != (c = mchars_num2char(seq, sz))) + c = mchars_num2char(seq, sz); + if ('\0' != c) encode(p, &c, 1); break; - case (ESCAPE_PREDEF): - cp = mchars_res2str(p->symtab, seq, sz, &ssz); - if (NULL != cp) - encode(p, cp, ssz); - break; case (ESCAPE_SPECIAL): cp = mchars_spec2str(p->symtab, seq, sz, &ssz); if (NULL != cp) @@ -473,6 +479,8 @@ term_word(struct termp *p, const char *word) case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; + case (ESCAPE_FONT): + /* FALLTHROUGH */ case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; @@ -512,7 +520,34 @@ bufferc(struct termp *p, char c) p->buf[p->col++] = c; } +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 4 >= p->maxcols) + adjbuf(p, p->col + 4); + + f = term_fonttop(p); + + if (TERMFONT_NONE == f) { + p->buf[p->col++] = c; + return; + } else if (TERMFONT_UNDER == f) { + p->buf[p->col++] = '_'; + } else + p->buf[p->col++] = c; + + p->buf[p->col++] = 8; + p->buf[p->col++] = c; +} + +static void encode(struct termp *p, const char *word, size_t sz) { enum termfont f; @@ -541,13 +576,16 @@ encode(struct termp *p, const char *word, size_t sz) adjbuf(p, p->col + 1 + (len * 3)); for (i = 0; i < len; i++) { - if ( ! isgraph((unsigned char)word[i])) { + if (ASCII_HYPH != word[i] && + ! isgraph((unsigned char)word[i])) { p->buf[p->col++] = word[i]; continue; } if (TERMFONT_UNDER == f) p->buf[p->col++] = '_'; + else if (ASCII_HYPH == word[i]) + p->buf[p->col++] = '-'; else p->buf[p->col++] = word[i]; @@ -570,6 +608,7 @@ term_strlen(const struct termp *p, const char *cp) size_t sz, rsz, i; int ssz, c; const char *seq, *rhs; + enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; /* @@ -584,21 +623,44 @@ term_strlen(const struct termp *p, const char *cp) for (i = 0; i < rsz; i++) sz += (*p->width)(p, *cp++); + c = 0; switch (*cp) { case ('\\'): cp++; - rhs = NULL; - switch (mandoc_escape(&cp, &seq, &ssz)) { - case (ESCAPE_ERROR): + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) return(sz); + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + c = mchars_num2uc + (seq + 1, ssz - 1); + if ('\0' == c) + break; + sz += (*p->width)(p, c); + continue; + case (ESCAPE_SPECIAL): + c = mchars_spec2cp + (p->symtab, seq, ssz); + if (c <= 0) + break; + sz += (*p->width)(p, c); + continue; + default: + break; + } + + rhs = NULL; + + switch (esc) { + case (ESCAPE_UNICODE): + sz += (*p->width)(p, '?'); + break; case (ESCAPE_NUMBERED): c = mchars_num2char(seq, ssz); if ('\0' != c) sz += (*p->width)(p, c); - break; - case (ESCAPE_PREDEF): - rhs = mchars_res2str - (p->symtab, seq, ssz, &rsz); break; case (ESCAPE_SPECIAL): rhs = mchars_spec2str