=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.152 retrieving revision 1.154 diff -u -p -r1.152 -r1.154 --- mandoc/roff.c 2011/07/26 14:09:01 1.152 +++ mandoc/roff.c 2011/07/27 07:09:41 1.154 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.152 2011/07/26 14:09:01 kristaps Exp $ */ +/* $Id: roff.c,v 1.154 2011/07/27 07:09:41 kristaps Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -159,7 +159,7 @@ static const char *roff_getstrn(const struct roff *, const char *, size_t); static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); -static int roff_res(struct roff *, +static void roff_res(struct roff *, char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, @@ -395,13 +395,13 @@ roff_alloc(struct mparse *parse) return(r); } - /* * Pre-filter each and every line for reserved words (one beginning with * `\*', e.g., `\*(ab'). These must be handled before the actual line * is processed. + * This also checks the syntax of regular escapes. */ -static int +static void roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { enum mandoc_esc esc; @@ -413,8 +413,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int size_t nsz; char *n; - /* Search for a leading backslash and save a pointer to it. */ - +again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { stesc = cp++; @@ -426,17 +425,18 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int */ if ('\0' == *cp) - return(1); + return; if ('*' != *cp) { res = cp; esc = mandoc_escape(&cp, NULL, NULL); if (ESCAPE_ERROR != esc) continue; - mandoc_msg(MANDOCERR_BADESCAPE, - r->parse, ln, pos, NULL); cp = res; - continue; + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(stesc - *bufp), NULL); + return; } cp++; @@ -449,7 +449,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int switch (*cp) { case ('\0'): - return(1); + return; case ('('): cp++; maxl = 2; @@ -467,8 +467,13 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int /* Advance to the end of the name. */ for (i = 0; 0 == maxl || i < maxl; i++, cp++) { - if ('\0' == *cp) - return(1); /* Error. */ + if ('\0' == *cp) { + mandoc_msg + (MANDOCERR_BADESCAPE, + r->parse, ln, + (int)(stesc - *bufp), NULL); + return; + } if (0 == maxl && ']' == *cp) break; } @@ -481,13 +486,16 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int res = roff_getstrn(r, stnam, (size_t)i); if (NULL == res) { - /* TODO: keep track of the correct position. */ - mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL); + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(stesc - *bufp), NULL); res = ""; } /* Replace the escape sequence by the string. */ + pos += (stesc - *bufp); + nsz = *szp + strlen(res) + 1; n = mandoc_malloc(nsz); @@ -499,13 +507,43 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int *bufp = n; *szp = nsz; - return(0); + goto again; } - - return(1); } +/* + * Process text streams: convert all breakable hyphens into ASCII_HYPH. + */ +static enum rofferr +roff_parsetext(char *p) +{ + size_t sz; + const char *start; + enum mandoc_esc esc; + start = p; + + while ('\0' != *p) { + sz = strcspn(p, "-\\"); + p += sz; + + if ('\\' == *p) { + /* Skip over escapes. */ + p++; + esc = mandoc_escape + ((const char **)&p, NULL, NULL); + if (ESCAPE_ERROR == esc) + break; + } else if ('-' == *p) { + if (mandoc_hyph(start, p)) + *p = ASCII_HYPH; + p++; + } + } + + return(ROFF_CONT); +} + enum rofferr roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp, int pos, int *offs) @@ -519,8 +557,7 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - if (r->first_string && ! roff_res(r, bufp, szp, ln, pos)) - return(ROFF_REPARSE); + roff_res(r, bufp, szp, ln, pos); ppos = pos; ctl = mandoc_getcontrol(*bufp, &pos); @@ -545,13 +582,13 @@ roff_parseln(struct roff *r, int ln, char **bufp, return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); if (r->tbl) return(tbl_read(r->tbl, ln, *bufp, pos)); - return(ROFF_CONT); + return(roff_parsetext(*bufp + pos)); } else if ( ! ctl) { if (r->eqn) return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); if (r->tbl) return(tbl_read(r->tbl, ln, *bufp, pos)); - return(ROFF_CONT); + return(roff_parsetext(*bufp + pos)); } else if (r->eqn) return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));