=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.153 retrieving revision 1.155 diff -u -p -r1.153 -r1.155 --- mandoc/roff.c 2011/07/26 14:24:06 1.153 +++ mandoc/roff.c 2011/07/27 07:32:26 1.155 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.153 2011/07/26 14:24:06 kristaps Exp $ */ +/* $Id: roff.c,v 1.155 2011/07/27 07:32:26 kristaps Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -143,6 +143,12 @@ struct predef { #define PREDEF(__name, __str) \ { (__name), (__str) }, +static enum rofft roffhash_find(const char *, size_t); +static void roffhash_init(void); +static void roffnode_cleanscope(struct roff *); +static void roffnode_pop(struct roff *); +static void roffnode_push(struct roff *, enum rofft, + const char *, int, int); static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); static enum rofferr roff_block_sub(ROFF_ARGS); @@ -153,13 +159,16 @@ static enum rofferr roff_cond_text(ROFF_ARGS); static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); static enum roffrule roff_evalcond(const char *, int *); +static void roff_free1(struct roff *); static void roff_freestr(struct roff *); static char *roff_getname(struct roff *, char **, int, int); static const char *roff_getstrn(const struct roff *, const char *, size_t); static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); -static int roff_res(struct roff *, +static enum rofft roff_parse(struct roff *, const char *, int *); +static enum rofferr roff_parsetext(char *); +static void roff_res(struct roff *, char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, @@ -172,7 +181,7 @@ static enum rofferr roff_EN(ROFF_ARGS); static enum rofferr roff_T_(ROFF_ARGS); static enum rofferr roff_userdef(ROFF_ARGS); -/* See roff_hash_find() */ +/* See roffhash_find() */ #define ASCII_HI 126 #define ASCII_LO 33 @@ -220,20 +229,11 @@ static const struct predef predefs[PREDEFS_MAX] = { #include "predefs.in" }; -static void roff_free1(struct roff *); -static enum rofft roff_hash_find(const char *, size_t); -static void roff_hash_init(void); -static void roffnode_cleanscope(struct roff *); -static void roffnode_push(struct roff *, enum rofft, - const char *, int, int); -static void roffnode_pop(struct roff *); -static enum rofft roff_parse(struct roff *, const char *, int *); - -/* See roff_hash_find() */ +/* See roffhash_find() */ #define ROFF_HASH(p) (p[0] - ASCII_LO) static void -roff_hash_init(void) +roffhash_init(void) { struct roffmac *n; int buc, i; @@ -258,7 +258,7 @@ roff_hash_init(void) * the nil-terminated string name could be found. */ static enum rofft -roff_hash_find(const char *p, size_t s) +roffhash_find(const char *p, size_t s) { int buc; struct roffmac *n; @@ -387,7 +387,7 @@ roff_alloc(struct mparse *parse) r->parse = parse; r->rstackpos = -1; - roff_hash_init(); + roffhash_init(); for (i = 0; i < PREDEFS_MAX; i++) roff_setstr(r, predefs[i].name, predefs[i].str, 0); @@ -400,8 +400,8 @@ roff_alloc(struct mparse *parse) * `\*', e.g., `\*(ab'). These must be handled before the actual line * is processed. * This also checks the syntax of regular escapes. -*/ -static int + */ +static void roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { enum mandoc_esc esc; @@ -413,8 +413,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int size_t nsz; char *n; - /* Search for a leading backslash and save a pointer to it. */ - +again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { stesc = cp++; @@ -426,7 +425,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int */ if ('\0' == *cp) - return(1); + return; if ('*' != *cp) { res = cp; @@ -437,7 +436,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - continue; + return; } cp++; @@ -450,7 +449,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int switch (*cp) { case ('\0'): - return(1); + return; case ('('): cp++; maxl = 2; @@ -473,7 +472,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return(1); + return; } if (0 == maxl && ']' == *cp) break; @@ -495,6 +494,8 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int /* Replace the escape sequence by the string. */ + pos += (stesc - *bufp); + nsz = *szp + strlen(res) + 1; n = mandoc_malloc(nsz); @@ -506,10 +507,52 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int *bufp = n; *szp = nsz; - return(0); + goto again; } +} - return(1); +/* + * Process text streams: convert all breakable hyphens into ASCII_HYPH. + */ +static enum rofferr +roff_parsetext(char *p) +{ + char l, r; + size_t sz; + const char *start; + enum mandoc_esc esc; + + start = p; + + while ('\0' != *p) { + sz = strcspn(p, "-\\"); + p += sz; + + if ('\\' == *p) { + /* Skip over escapes. */ + p++; + esc = mandoc_escape + ((const char **)&p, NULL, NULL); + if (ESCAPE_ERROR == esc) + break; + continue; + } else if ('-' != *p || p == start) + continue; + + l = *(p - 1); + r = *(p + 1); + + if ('\\' != l && + '\t' != r && '\t' != l && + ' ' != r && ' ' != l && + '-' != r && '-' != l && + ! isdigit((unsigned char)l) && + ! isdigit((unsigned char)r)) + *p = ASCII_HYPH; + p++; + } + + return(ROFF_CONT); } enum rofferr @@ -525,8 +568,7 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - if ( ! roff_res(r, bufp, szp, ln, pos)) - return(ROFF_REPARSE); + roff_res(r, bufp, szp, ln, pos); ppos = pos; ctl = mandoc_getcontrol(*bufp, &pos); @@ -551,13 +593,13 @@ roff_parseln(struct roff *r, int ln, char **bufp, return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); if (r->tbl) return(tbl_read(r->tbl, ln, *bufp, pos)); - return(ROFF_CONT); + return(roff_parsetext(*bufp + pos)); } else if ( ! ctl) { if (r->eqn) return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); if (r->tbl) return(tbl_read(r->tbl, ln, *bufp, pos)); - return(ROFF_CONT); + return(roff_parsetext(*bufp + pos)); } else if (r->eqn) return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); @@ -637,7 +679,7 @@ roff_parse(struct roff *r, const char *buf, int *pos) maclen = strcspn(mac + 1, " \\\t\0") + 1; t = (r->current_string = roff_getstrn(r, mac, maclen)) - ? ROFF_USERDEF : roff_hash_find(mac, maclen); + ? ROFF_USERDEF : roffhash_find(mac, maclen); *pos += (int)maclen;