=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.166 retrieving revision 1.174 diff -u -p -r1.166 -r1.174 --- mandoc/roff.c 2011/07/29 09:19:48 1.166 +++ mandoc/roff.c 2012/06/12 20:21:04 1.174 @@ -1,7 +1,7 @@ -/* $Id: roff.c,v 1.166 2011/07/29 09:19:48 kristaps Exp $ */ +/* $Id: roff.c,v 1.174 2012/06/12 20:21:04 kristaps Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2011 Ingo Schwarze + * Copyright (c) 2010, 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -31,11 +31,15 @@ /* Maximum number of nested if-else conditionals. */ #define RSTACK_MAX 128 +/* Maximum number of string expansions per line, to break infinite loops. */ +#define EXPAND_LIMIT 1000 + enum rofft { ROFF_ad, ROFF_am, ROFF_ami, ROFF_am1, + ROFF_cc, ROFF_de, ROFF_dei, ROFF_de1, @@ -81,13 +85,16 @@ struct reg { unsigned int u; /* unsigned integer */ }; +/* + * An incredibly-simple string buffer. + */ struct roffstr { - char *p; - size_t sz; + char *p; /* nil-terminated buffer */ + size_t sz; /* saved strlen(p) */ }; /* - * A key-value string pair with lengths. + * A key-value roffstr pair as part of a singly-linked list. */ struct roffkv { struct roffstr key; @@ -99,10 +106,12 @@ struct roff { struct mparse *parse; /* parse point */ struct roffnode *last; /* leaf of stack */ enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ + char control; /* control character */ int rstackpos; /* position in rstack */ struct reg regs[REG__MAX]; struct roffkv *strtab; /* user-defined strings & macros */ - struct roffkv *chrtab; /* user-defined characters */ + struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ + struct roffstr *xtab; /* single-byte trans table (`tr') */ const char *current_string; /* value of last called user macro */ struct tbl_node *first_tbl; /* first table parsed */ struct tbl_node *last_tbl; /* last table parsed */ @@ -162,6 +171,7 @@ static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); static enum rofferr roff_block_sub(ROFF_ARGS); static enum rofferr roff_cblock(ROFF_ARGS); +static enum rofferr roff_cc(ROFF_ARGS); static enum rofferr roff_ccond(ROFF_ARGS); static enum rofferr roff_cond(ROFF_ARGS); static enum rofferr roff_cond_text(ROFF_ARGS); @@ -169,17 +179,17 @@ static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); static enum roffrule roff_evalcond(const char *, int *); static void roff_free1(struct roff *); -static void roff_freestr(struct roffkv **); +static void roff_freestr(struct roffkv *); static char *roff_getname(struct roff *, char **, int, int); static const char *roff_getstrn(const struct roff *, const char *, size_t); static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); -static void roff_openeqn(struct roff *, const char *, +static void roff_openeqn(struct roff *, const char *, int, int, const char *); static enum rofft roff_parse(struct roff *, const char *, int *); static enum rofferr roff_parsetext(char *); -static void roff_res(struct roff *, +static enum rofferr roff_res(struct roff *, char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, @@ -208,6 +218,7 @@ static struct roffmac roffs[ROFF_MAX] = { { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "cc", roff_cc, NULL, NULL, 0, NULL }, { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, @@ -346,6 +357,7 @@ roff_free1(struct roff *r) { struct tbl_node *t; struct eqn_node *e; + int i; while (NULL != (t = r->first_tbl)) { r->first_tbl = t->next; @@ -364,11 +376,19 @@ roff_free1(struct roff *r) while (r->last) roffnode_pop(r); - roff_freestr(&r->strtab); - roff_freestr(&r->chrtab); -} + roff_freestr(r->strtab); + roff_freestr(r->xmbtab); + r->strtab = r->xmbtab = NULL; + if (r->xtab) + for (i = 0; i < 128; i++) + free(r->xtab[i].p); + + free(r->xtab); + r->xtab = NULL; +} + void roff_reset(struct roff *r) { @@ -376,6 +396,7 @@ roff_reset(struct roff *r) roff_free1(r); + r->control = 0; memset(&r->regs, 0, sizeof(struct reg) * REG__MAX); for (i = 0; i < PREDEFS_MAX; i++) @@ -416,7 +437,7 @@ roff_alloc(struct mparse *parse) * is processed. * This also checks the syntax of regular escapes. */ -static void +static enum rofferr roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { enum mandoc_esc esc; @@ -424,10 +445,12 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ - int i, maxl; + int i, maxl, expand_count; size_t nsz; char *n; + expand_count = 0; + again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { @@ -440,7 +463,7 @@ again: */ if ('\0' == *cp) - return; + return(ROFF_CONT); if ('*' != *cp) { res = cp; @@ -451,7 +474,7 @@ again: mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return; + return(ROFF_CONT); } cp++; @@ -464,7 +487,7 @@ again: switch (*cp) { case ('\0'): - return; + return(ROFF_CONT); case ('('): cp++; maxl = 2; @@ -487,7 +510,7 @@ again: (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return; + return(ROFF_CONT); } if (0 == maxl && ']' == *cp) break; @@ -522,8 +545,15 @@ again: *bufp = n; *szp = nsz; - goto again; + + if (EXPAND_LIMIT >= ++expand_count) + goto again; + + /* Just leave the string unexpanded. */ + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); + return(ROFF_IGN); } + return(ROFF_CONT); } /* @@ -532,7 +562,6 @@ again: static enum rofferr roff_parsetext(char *p) { - char l, r; size_t sz; const char *start; enum mandoc_esc esc; @@ -559,14 +588,8 @@ roff_parsetext(char *p) continue; } - l = *(p - 1); - r = *(p + 1); - if ('\\' != l && - '\t' != r && '\t' != l && - ' ' != r && ' ' != l && - '-' != r && '-' != l && - ! isdigit((unsigned char)l) && - ! isdigit((unsigned char)r)) + if (isalpha((unsigned char)p[-1]) && + isalpha((unsigned char)p[1])) *p = ASCII_HYPH; p++; } @@ -587,10 +610,13 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - roff_res(r, bufp, szp, ln, pos); + e = roff_res(r, bufp, szp, ln, pos); + if (ROFF_IGN == e) + return(e); + assert(ROFF_CONT == e); ppos = pos; - ctl = mandoc_getcontrol(*bufp, &pos); + ctl = roff_getcontrol(r, *bufp, &pos); /* * First, if a scope is open and we're not a macro, pass the @@ -757,7 +783,7 @@ roffnode_cleanscope(struct roff *r) { while (r->last) { - if (--r->last->endspan < 0) + if (--r->last->endspan != 0) break; roffnode_pop(r); } @@ -1077,9 +1103,9 @@ roff_line_ignore(ROFF_ARGS) static enum rofferr roff_cond(ROFF_ARGS) { - int sv; - enum roffrule rule; + roffnode_push(r, tok, NULL, ln, ppos); + /* * An `.el' has no conditional body: it will consume the value * of the current rstack entry set in prior `ie' calls or @@ -1088,32 +1114,12 @@ roff_cond(ROFF_ARGS) * If we're not an `el', however, then evaluate the conditional. */ - rule = ROFF_el == tok ? + r->last->rule = ROFF_el == tok ? (r->rstackpos < 0 ? ROFFRULE_DENY : r->rstack[r->rstackpos--]) : roff_evalcond(*bufp, &pos); - sv = pos; - while (' ' == (*bufp)[pos]) - pos++; - /* - * Roff is weird. If we have just white-space after the - * conditional, it's considered the BODY and we exit without - * really doing anything. Warn about this. It's probably - * wrong. - */ - - if ('\0' == (*bufp)[pos] && sv != pos) { - mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); - return(ROFF_IGN); - } - - roffnode_push(r, tok, NULL, ln, ppos); - - r->last->rule = rule; - - /* * An if-else will put the NEGATION of the current evaluated * conditional into the stack of rules. */ @@ -1135,28 +1141,39 @@ roff_cond(ROFF_ARGS) r->last->rule = ROFFRULE_DENY; /* - * Determine scope. If we're invoked with "\{" trailing the - * conditional, then we're in a multiline scope. Else our scope - * expires on the next line. + * Determine scope. + * If there is nothing on the line after the conditional, + * not even whitespace, use next-line scope. */ - r->last->endspan = 1; + if ('\0' == (*bufp)[pos]) { + r->last->endspan = 2; + goto out; + } + while (' ' == (*bufp)[pos]) + pos++; + + /* An opening brace requests multiline scope. */ + if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { r->last->endspan = -1; pos += 2; + goto out; } /* - * If there are no arguments on the line, the next-line scope is - * assumed. + * Anything else following the conditional causes + * single-line scope. Warn if the scope contains + * nothing but trailing whitespace. */ if ('\0' == (*bufp)[pos]) - return(ROFF_IGN); + mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); - /* Otherwise re-run the roff parser after recalculating. */ + r->last->endspan = 1; +out: *offs = pos; return(ROFF_RERUN); } @@ -1351,6 +1368,23 @@ roff_TS(ROFF_ARGS) /* ARGSUSED */ static enum rofferr +roff_cc(ROFF_ARGS) +{ + const char *p; + + p = *bufp + pos; + + if ('\0' == *p || '.' == (r->control = *p++)) + r->control = 0; + + if ('\0' != *p) + mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); + + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr roff_tr(ROFF_ARGS) { const char *p, *first, *second; @@ -1396,7 +1430,19 @@ roff_tr(ROFF_ARGS) p--; } - roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0); + if (fsz > 1) { + roff_setstrn(&r->xmbtab, first, + fsz, second, ssz, 0); + continue; + } + + if (NULL == r->xtab) + r->xtab = mandoc_calloc + (128, sizeof(struct roffstr)); + + free(r->xtab[(int)*first].p); + r->xtab[(int)*first].p = mandoc_strndup(second, ssz); + r->xtab[(int)*first].sz = ssz; } return(ROFF_IGN); @@ -1616,18 +1662,16 @@ roff_getstrn(const struct roff *r, const char *name, s } static void -roff_freestr(struct roffkv **r) +roff_freestr(struct roffkv *r) { struct roffkv *n, *nn; - for (n = *r; n; n = nn) { + for (n = r; n; n = nn) { free(n->key.p); free(n->val.p); nn = n->next; free(n); } - - *r = NULL; } const struct tbl_span * @@ -1644,13 +1688,6 @@ roff_eqn(const struct roff *r) return(r->last_eqn ? &r->last_eqn->eqn : NULL); } -char -roff_eqndelim(const struct roff *r) -{ - - return('\0'); -} - /* * Duplicate an input string, making the appropriate character * conversations (as stipulated by `tr') along the way. @@ -1665,7 +1702,7 @@ roff_strdup(const struct roff *r, const char *p) size_t ssz, sz; enum mandoc_esc esc; - if (NULL == r->chrtab) + if (NULL == r->xmbtab && NULL == r->xtab) return(mandoc_strdup(p)); else if ('\0' == *p) return(mandoc_strdup("")); @@ -1682,8 +1719,21 @@ roff_strdup(const struct roff *r, const char *p) ssz = 0; while ('\0' != *p) { + if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { + sz = r->xtab[(int)*p].sz; + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, r->xtab[(int)*p].p, sz); + ssz += sz; + p++; + continue; + } else if ('\\' != *p) { + res = mandoc_realloc(res, ssz + 2); + res[ssz++] = *p++; + continue; + } + /* Search for term matches. */ - for (cp = r->chrtab; cp; cp = cp->next) + for (cp = r->xmbtab; cp; cp = cp->next) if (0 == strncmp(p, cp->key.p, cp->key.sz)) break; @@ -1701,38 +1751,66 @@ roff_strdup(const struct roff *r, const char *p) continue; } - if ('\\' == *p) { - /* - * Handle escapes carefully: we need to copy - * over just the escape itself, or else we might - * do replacements within the escape itself. - * Make sure to pass along the bogus string. - */ - pp = p++; - esc = mandoc_escape(&p, NULL, NULL); - if (ESCAPE_ERROR == esc) { - sz = strlen(pp); - res = mandoc_realloc(res, ssz + sz + 1); - memcpy(res + ssz, pp, sz); - break; - } - /* - * We bail out on bad escapes. - * No need to warn: we already did so when - * roff_res() was called. - */ - sz = (int)(p - pp); + /* + * Handle escapes carefully: we need to copy + * over just the escape itself, or else we might + * do replacements within the escape itself. + * Make sure to pass along the bogus string. + */ + pp = p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + sz = strlen(pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); - ssz += sz; - continue; + break; } - - /* Just append the charater. */ - res = mandoc_realloc(res, ssz + 2); - res[ssz++] = *p++; + /* + * We bail out on bad escapes. + * No need to warn: we already did so when + * roff_res() was called. + */ + sz = (int)(p - pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + ssz += sz; } res[(int)ssz] = '\0'; return(res); +} + +/* + * Find out whether a line is a macro line or not. + * If it is, adjust the current position and return one; if it isn't, + * return zero and don't change the current position. + * If the control character has been set with `.cc', then let that grain + * precedence. + * This is slighly contrary to groff, where using the non-breaking + * control character when `cc' has been invoked will cause the + * non-breaking macro contents to be printed verbatim. + */ +int +roff_getcontrol(const struct roff *r, const char *cp, int *ppos) +{ + int pos; + + pos = *ppos; + + if (0 != r->control && cp[pos] == r->control) + pos++; + else if (0 != r->control) + return(0); + else if ('\\' == cp[pos] && '.' == cp[pos + 1]) + pos += 2; + else if ('.' == cp[pos] || '\'' == cp[pos]) + pos++; + else + return(0); + + while (' ' == cp[pos] || '\t' == cp[pos]) + pos++; + + *ppos = pos; + return(1); }