version 1.9, 2014/10/25 01:03:52 |
version 1.12, 2014/11/14 04:24:04 |
|
|
#include "libmandoc.h" |
#include "libmandoc.h" |
|
|
int |
int |
preconv_encode(struct buf *ib, struct buf *ob, int *filenc) |
preconv_encode(struct buf *ib, size_t *ii, struct buf *ob, size_t *oi, |
|
int *filenc) |
{ |
{ |
int state, be; |
|
unsigned int accum; |
|
size_t i; |
size_t i; |
|
int state; |
|
unsigned int accum; |
unsigned char cu; |
unsigned char cu; |
const long one = 1L; |
|
|
|
if ( ! (*filenc & MPARSE_UTF8)) |
if ( ! (*filenc & MPARSE_UTF8)) |
goto latin; |
goto latin; |
|
|
state = 0; |
state = 0; |
accum = 0U; |
accum = 0U; |
be = 0; |
|
|
|
/* Quick test for big-endian value. */ |
for (i = *ii; i < ib->sz; i++) { |
|
|
if ( ! (*((const char *)(&one)))) |
|
be = 1; |
|
|
|
for (i = ib->offs; i < ib->sz; i++) { |
|
cu = ib->buf[i]; |
cu = ib->buf[i]; |
if (state) { |
if (state) { |
if ( ! (cu & 128) || (cu & 64)) { |
if ( ! (cu & 128) || (cu & 64)) { |
Line 65 preconv_encode(struct buf *ib, struct buf *ob, int *fi |
|
Line 59 preconv_encode(struct buf *ib, struct buf *ob, int *fi |
|
if (state) |
if (state) |
continue; |
continue; |
|
|
/* |
|
* Accum is held in little-endian order as |
|
* stipulated by the UTF-8 sequence coding. We |
|
* need to convert to a native big-endian if our |
|
* architecture requires it. |
|
*/ |
|
|
|
if (be) |
|
accum = (accum >> 24) | |
|
((accum << 8) & 0x00FF0000) | |
|
((accum >> 8) & 0x0000FF00) | |
|
(accum << 24); |
|
|
|
if (accum < 0x80) |
if (accum < 0x80) |
ob->buf[ob->offs++] = accum; |
ob->buf[(*oi)++] = accum; |
else |
else |
ob->offs += snprintf(ob->buf + ob->offs, |
*oi += snprintf(ob->buf + *oi, |
11, "\\[u%.4X]", accum); |
11, "\\[u%.4X]", accum); |
ib->offs = i + 1; |
*ii = i + 1; |
*filenc &= ~MPARSE_LATIN1; |
*filenc &= ~MPARSE_LATIN1; |
return(1); |
return(1); |
} else { |
} else { |
Line 92 preconv_encode(struct buf *ib, struct buf *ob, int *fi |
|
Line 73 preconv_encode(struct buf *ib, struct buf *ob, int *fi |
|
* UTF-8 bitmask, calculate the expected UTF-8 |
* UTF-8 bitmask, calculate the expected UTF-8 |
* state from it. |
* state from it. |
*/ |
*/ |
for (state = 0; state < 7; state++) |
for (state = 0; state < 7; state++) |
if ( ! (cu & (1 << (7 - state)))) |
if ( ! (cu & (1 << (7 - state)))) |
break; |
break; |
|
|
|
|
if ( ! (*filenc & MPARSE_LATIN1)) |
if ( ! (*filenc & MPARSE_LATIN1)) |
return(0); |
return(0); |
|
|
ob->offs += snprintf(ob->buf + ob->offs, 11, |
*oi += snprintf(ob->buf + *oi, 11, |
"\\[u%.4X]", (unsigned char)ib->buf[ib->offs++]); |
"\\[u%.4X]", (unsigned char)ib->buf[(*ii)++]); |
|
|
*filenc &= ~MPARSE_UTF8; |
*filenc &= ~MPARSE_UTF8; |
return(1); |
return(1); |
} |
} |
|
|
int |
int |
preconv_cue(const struct buf *b) |
preconv_cue(const struct buf *b, size_t offset) |
{ |
{ |
const char *ln, *eoln, *eoph; |
const char *ln, *eoln, *eoph; |
size_t sz, phsz; |
size_t sz, phsz; |
|
|
ln = b->buf + b->offs; |
ln = b->buf + offset; |
sz = b->sz - b->offs; |
sz = b->sz - offset; |
|
|
/* Look for the end-of-line. */ |
/* Look for the end-of-line. */ |
|
|
Line 157 preconv_cue(const struct buf *b) |
|
Line 138 preconv_cue(const struct buf *b) |
|
|
|
/* Check if we have the correct header/trailer. */ |
/* Check if we have the correct header/trailer. */ |
|
|
if ((sz = (size_t)(eoln - ln)) < 10 || |
if ((sz = (size_t)(eoln - ln)) < 10 || |
memcmp(ln, ".\\\" -*-", 7) || |
memcmp(ln, ".\\\" -*-", 7) || memcmp(eoln - 3, "-*-", 3)) |
memcmp(eoln - 3, "-*-", 3)) |
|
return(MPARSE_UTF8 | MPARSE_LATIN1); |
return(MPARSE_UTF8 | MPARSE_LATIN1); |
|
|
/* Move after the header and adjust for the trailer. */ |
/* Move after the header and adjust for the trailer. */ |
Line 189 preconv_cue(const struct buf *b) |
|
Line 169 preconv_cue(const struct buf *b) |
|
sz -= phsz; |
sz -= phsz; |
ln += phsz; |
ln += phsz; |
continue; |
continue; |
} |
} |
|
|
sz -= 7; |
sz -= 7; |
ln += 7; |
ln += 7; |