=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.164 retrieving revision 1.173 diff -u -p -r1.164 -r1.173 --- mandoc/roff.c 2011/07/28 14:17:11 1.164 +++ mandoc/roff.c 2012/05/31 22:41:19 1.173 @@ -1,7 +1,7 @@ -/* $Id: roff.c,v 1.164 2011/07/28 14:17:11 kristaps Exp $ */ +/* $Id: roff.c,v 1.173 2012/05/31 22:41:19 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2011 Ingo Schwarze + * Copyright (c) 2010, 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -31,6 +31,9 @@ /* Maximum number of nested if-else conditionals. */ #define RSTACK_MAX 128 +/* Maximum number of string expansions per line, to break infinite loops. */ +#define EXPAND_LIMIT 1000 + enum rofft { ROFF_ad, ROFF_am, @@ -77,26 +80,36 @@ enum roffrule { * Registers are assumed to be unsigned ints for now. */ struct reg { - int set; /* whether set or not */ - unsigned int u; /* unsigned integer */ + int set; /* whether set or not */ + unsigned int u; /* unsigned integer */ }; +/* + * An incredibly-simple string buffer. + */ struct roffstr { - char *key; /* key of symbol */ - size_t keysz; - char *val; /* current value */ - size_t valsz; - struct roffstr *next; /* next in list */ + char *p; /* nil-terminated buffer */ + size_t sz; /* saved strlen(p) */ }; +/* + * A key-value roffstr pair as part of a singly-linked list. + */ +struct roffkv { + struct roffstr key; + struct roffstr val; + struct roffkv *next; /* next in list */ +}; + struct roff { struct mparse *parse; /* parse point */ struct roffnode *last; /* leaf of stack */ enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ int rstackpos; /* position in rstack */ struct reg regs[REG__MAX]; - struct roffstr *strtab; /* user-defined strings & macros */ - struct roffstr *chrtab; /* user-defined characters */ + struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ + struct roffstr *xtab; /* single-byte trans table (`tr') */ const char *current_string; /* value of last called user macro */ struct tbl_node *first_tbl; /* first table parsed */ struct tbl_node *last_tbl; /* last table parsed */ @@ -163,22 +176,22 @@ static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); static enum roffrule roff_evalcond(const char *, int *); static void roff_free1(struct roff *); -static void roff_freestr(struct roffstr **); +static void roff_freestr(struct roffkv *); static char *roff_getname(struct roff *, char **, int, int); static const char *roff_getstrn(const struct roff *, const char *, size_t); static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); -static void roff_openeqn(struct roff *, const char *, +static void roff_openeqn(struct roff *, const char *, int, int, const char *); static enum rofft roff_parse(struct roff *, const char *, int *); static enum rofferr roff_parsetext(char *); -static void roff_res(struct roff *, +static enum rofferr roff_res(struct roff *, char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); -static void roff_setstrn(struct roffstr **, const char *, +static void roff_setstrn(struct roffkv **, const char *, size_t, const char *, size_t, int); static enum rofferr roff_so(ROFF_ARGS); static enum rofferr roff_tr(ROFF_ARGS); @@ -340,6 +353,7 @@ roff_free1(struct roff *r) { struct tbl_node *t; struct eqn_node *e; + int i; while (NULL != (t = r->first_tbl)) { r->first_tbl = t->next; @@ -358,11 +372,19 @@ roff_free1(struct roff *r) while (r->last) roffnode_pop(r); - roff_freestr(&r->strtab); - roff_freestr(&r->chrtab); -} + roff_freestr(r->strtab); + roff_freestr(r->xmbtab); + r->strtab = r->xmbtab = NULL; + if (r->xtab) + for (i = 0; i < 128; i++) + free(r->xtab[i].p); + + free(r->xtab); + r->xtab = NULL; +} + void roff_reset(struct roff *r) { @@ -410,7 +432,7 @@ roff_alloc(struct mparse *parse) * is processed. * This also checks the syntax of regular escapes. */ -static void +static enum rofferr roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { enum mandoc_esc esc; @@ -418,10 +440,12 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ - int i, maxl; + int i, maxl, expand_count; size_t nsz; char *n; + expand_count = 0; + again: cp = *bufp + pos; while (NULL != (cp = strchr(cp, '\\'))) { @@ -434,7 +458,7 @@ again: */ if ('\0' == *cp) - return; + return(ROFF_CONT); if ('*' != *cp) { res = cp; @@ -445,7 +469,7 @@ again: mandoc_msg (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return; + return(ROFF_CONT); } cp++; @@ -458,7 +482,7 @@ again: switch (*cp) { case ('\0'): - return; + return(ROFF_CONT); case ('('): cp++; maxl = 2; @@ -481,7 +505,7 @@ again: (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return; + return(ROFF_CONT); } if (0 == maxl && ']' == *cp) break; @@ -516,8 +540,15 @@ again: *bufp = n; *szp = nsz; - goto again; + + if (EXPAND_LIMIT >= ++expand_count) + goto again; + + /* Just leave the string unexpanded. */ + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); + return(ROFF_IGN); } + return(ROFF_CONT); } /* @@ -526,7 +557,6 @@ again: static enum rofferr roff_parsetext(char *p) { - char l, r; size_t sz; const char *start; enum mandoc_esc esc; @@ -553,14 +583,8 @@ roff_parsetext(char *p) continue; } - l = *(p - 1); - r = *(p + 1); - if ('\\' != l && - '\t' != r && '\t' != l && - ' ' != r && ' ' != l && - '-' != r && '-' != l && - ! isdigit((unsigned char)l) && - ! isdigit((unsigned char)r)) + if (isalpha((unsigned char)p[-1]) && + isalpha((unsigned char)p[1])) *p = ASCII_HYPH; p++; } @@ -581,7 +605,10 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - roff_res(r, bufp, szp, ln, pos); + e = roff_res(r, bufp, szp, ln, pos); + if (ROFF_IGN == e) + return(e); + assert(ROFF_CONT == e); ppos = pos; ctl = mandoc_getcontrol(*bufp, &pos); @@ -751,7 +778,7 @@ roffnode_cleanscope(struct roff *r) { while (r->last) { - if (--r->last->endspan < 0) + if (--r->last->endspan != 0) break; roffnode_pop(r); } @@ -1071,9 +1098,9 @@ roff_line_ignore(ROFF_ARGS) static enum rofferr roff_cond(ROFF_ARGS) { - int sv; - enum roffrule rule; + roffnode_push(r, tok, NULL, ln, ppos); + /* * An `.el' has no conditional body: it will consume the value * of the current rstack entry set in prior `ie' calls or @@ -1082,32 +1109,12 @@ roff_cond(ROFF_ARGS) * If we're not an `el', however, then evaluate the conditional. */ - rule = ROFF_el == tok ? + r->last->rule = ROFF_el == tok ? (r->rstackpos < 0 ? ROFFRULE_DENY : r->rstack[r->rstackpos--]) : roff_evalcond(*bufp, &pos); - sv = pos; - while (' ' == (*bufp)[pos]) - pos++; - /* - * Roff is weird. If we have just white-space after the - * conditional, it's considered the BODY and we exit without - * really doing anything. Warn about this. It's probably - * wrong. - */ - - if ('\0' == (*bufp)[pos] && sv != pos) { - mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); - return(ROFF_IGN); - } - - roffnode_push(r, tok, NULL, ln, ppos); - - r->last->rule = rule; - - /* * An if-else will put the NEGATION of the current evaluated * conditional into the stack of rules. */ @@ -1129,28 +1136,39 @@ roff_cond(ROFF_ARGS) r->last->rule = ROFFRULE_DENY; /* - * Determine scope. If we're invoked with "\{" trailing the - * conditional, then we're in a multiline scope. Else our scope - * expires on the next line. + * Determine scope. + * If there is nothing on the line after the conditional, + * not even whitespace, use next-line scope. */ - r->last->endspan = 1; + if ('\0' == (*bufp)[pos]) { + r->last->endspan = 2; + goto out; + } + while (' ' == (*bufp)[pos]) + pos++; + + /* An opening brace requests multiline scope. */ + if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { r->last->endspan = -1; pos += 2; + goto out; } /* - * If there are no arguments on the line, the next-line scope is - * assumed. + * Anything else following the conditional causes + * single-line scope. Warn if the scope contains + * nothing but trailing whitespace. */ if ('\0' == (*bufp)[pos]) - return(ROFF_IGN); + mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); - /* Otherwise re-run the roff parser after recalculating. */ + r->last->endspan = 1; +out: *offs = pos; return(ROFF_RERUN); } @@ -1383,13 +1401,26 @@ roff_tr(ROFF_ARGS) return(ROFF_IGN); } ssz = (size_t)(p - second); - } else if ('\0' == *p) { + } else if ('\0' == *second) { mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, (int)(p - *bufp), NULL); second = " "; + p--; } - roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0); + if (fsz > 1) { + roff_setstrn(&r->xmbtab, first, + fsz, second, ssz, 0); + continue; + } + + if (NULL == r->xtab) + r->xtab = mandoc_calloc + (128, sizeof(struct roffstr)); + + free(r->xtab[(int)*first].p); + r->xtab[(int)*first].p = mandoc_strndup(second, ssz); + r->xtab[(int)*first].sz = ssz; } return(ROFF_IGN); @@ -1524,10 +1555,10 @@ roff_setstr(struct roff *r, const char *name, const ch } static void -roff_setstrn(struct roffstr **r, const char *name, size_t namesz, +roff_setstrn(struct roffkv **r, const char *name, size_t namesz, const char *string, size_t stringsz, int multiline) { - struct roffstr *n; + struct roffkv *n; char *c; int i; size_t oldch, newch; @@ -1535,23 +1566,23 @@ roff_setstrn(struct roffstr **r, const char *name, siz /* Search for an existing string with the same name. */ n = *r; - while (n && strcmp(name, n->key)) + while (n && strcmp(name, n->key.p)) n = n->next; if (NULL == n) { /* Create a new string table entry. */ - n = mandoc_malloc(sizeof(struct roffstr)); - n->key = mandoc_strndup(name, namesz); - n->keysz = namesz; - n->val = NULL; - n->valsz = 0; + n = mandoc_malloc(sizeof(struct roffkv)); + n->key.p = mandoc_strndup(name, namesz); + n->key.sz = namesz; + n->val.p = NULL; + n->val.sz = 0; n->next = *r; *r = n; } else if (0 == multiline) { /* In multiline mode, append; else replace. */ - free(n->val); - n->val = NULL; - n->valsz = 0; + free(n->val.p); + n->val.p = NULL; + n->val.sz = 0; } if (NULL == string) @@ -1563,17 +1594,17 @@ roff_setstrn(struct roffstr **r, const char *name, siz */ newch = stringsz + (multiline ? 2u : 1u); - if (NULL == n->val) { - n->val = mandoc_malloc(newch); - *n->val = '\0'; + if (NULL == n->val.p) { + n->val.p = mandoc_malloc(newch); + *n->val.p = '\0'; oldch = 0; } else { - oldch = n->valsz; - n->val = mandoc_realloc(n->val, oldch + newch); + oldch = n->val.sz; + n->val.p = mandoc_realloc(n->val.p, oldch + newch); } /* Skip existing content in the destination buffer. */ - c = n->val + (int)oldch; + c = n->val.p + (int)oldch; /* Append new content to the destination buffer. */ i = 0; @@ -1592,35 +1623,33 @@ roff_setstrn(struct roffstr **r, const char *name, siz *c++ = '\n'; *c = '\0'; - n->valsz = (int)(c - n->val); + n->val.sz = (int)(c - n->val.p); } static const char * roff_getstrn(const struct roff *r, const char *name, size_t len) { - const struct roffstr *n; + const struct roffkv *n; for (n = r->strtab; n; n = n->next) - if (0 == strncmp(name, n->key, len) && - '\0' == n->key[(int)len]) - return(n->val); + if (0 == strncmp(name, n->key.p, len) && + '\0' == n->key.p[(int)len]) + return(n->val.p); return(NULL); } static void -roff_freestr(struct roffstr **r) +roff_freestr(struct roffkv *r) { - struct roffstr *n, *nn; + struct roffkv *n, *nn; - for (n = *r; n; n = nn) { - free(n->key); - free(n->val); + for (n = r; n; n = nn) { + free(n->key.p); + free(n->val.p); nn = n->next; free(n); } - - *r = NULL; } const struct tbl_span * @@ -1637,13 +1666,6 @@ roff_eqn(const struct roff *r) return(r->last_eqn ? &r->last_eqn->eqn : NULL); } -char -roff_eqndelim(const struct roff *r) -{ - - return('\0'); -} - /* * Duplicate an input string, making the appropriate character * conversations (as stipulated by `tr') along the way. @@ -1652,13 +1674,13 @@ roff_eqndelim(const struct roff *r) char * roff_strdup(const struct roff *r, const char *p) { - const struct roffstr *cp; + const struct roffkv *cp; char *res; const char *pp; size_t ssz, sz; enum mandoc_esc esc; - if (NULL == r->chrtab) + if (NULL == r->xmbtab && NULL == r->xtab) return(mandoc_strdup(p)); else if ('\0' == *p) return(mandoc_strdup("")); @@ -1675,9 +1697,22 @@ roff_strdup(const struct roff *r, const char *p) ssz = 0; while ('\0' != *p) { + if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { + sz = r->xtab[(int)*p].sz; + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, r->xtab[(int)*p].p, sz); + ssz += sz; + p++; + continue; + } else if ('\\' != *p) { + res = mandoc_realloc(res, ssz + 2); + res[ssz++] = *p++; + continue; + } + /* Search for term matches. */ - for (cp = r->chrtab; cp; cp = cp->next) - if (0 == strncmp(p, cp->key, cp->keysz)) + for (cp = r->xmbtab; cp; cp = cp->next) + if (0 == strncmp(p, cp->key.p, cp->key.sz)) break; if (NULL != cp) { @@ -1686,43 +1721,37 @@ roff_strdup(const struct roff *r, const char *p) * Append the match to the array and move * forward by its keysize. */ - res = mandoc_realloc(res, ssz + cp->valsz + 1); - memcpy(res + ssz, cp->val, cp->valsz); - ssz += cp->valsz; - p += (int)cp->keysz; + res = mandoc_realloc + (res, ssz + cp->val.sz + 1); + memcpy(res + ssz, cp->val.p, cp->val.sz); + ssz += cp->val.sz; + p += (int)cp->key.sz; continue; } - if ('\\' == *p) { - /* - * Handle escapes carefully: we need to copy - * over just the escape itself, or else we might - * do replacements within the escape itself. - * Make sure to pass along the bogus string. - */ - pp = p++; - esc = mandoc_escape(&p, NULL, NULL); - if (ESCAPE_ERROR == esc) { - sz = strlen(pp); - res = mandoc_realloc(res, ssz + sz + 1); - memcpy(res + ssz, pp, sz); - break; - } - /* - * We bail out on bad escapes. - * No need to warn: we already did so when - * roff_res() was called. - */ - sz = (int)(p - pp); + /* + * Handle escapes carefully: we need to copy + * over just the escape itself, or else we might + * do replacements within the escape itself. + * Make sure to pass along the bogus string. + */ + pp = p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + sz = strlen(pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); - ssz += sz; - continue; + break; } - - /* Just append the charater. */ - res = mandoc_realloc(res, ssz + 2); - res[ssz++] = *p++; + /* + * We bail out on bad escapes. + * No need to warn: we already did so when + * roff_res() was called. + */ + sz = (int)(p - pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + ssz += sz; } res[(int)ssz] = '\0';