=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.200 retrieving revision 1.206 diff -u -p -r1.200 -r1.206 --- mandoc/roff.c 2014/03/20 02:57:28 1.200 +++ mandoc/roff.c 2014/04/08 01:37:27 1.206 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.200 2014/03/20 02:57:28 schwarze Exp $ */ +/* $Id: roff.c,v 1.206 2014/04/08 01:37:27 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons * Copyright (c) 2010-2014 Ingo Schwarze @@ -26,6 +26,7 @@ #include #include "mandoc.h" +#include "mandoc_aux.h" #include "libroff.h" #include "libmandoc.h" @@ -61,6 +62,7 @@ enum rofft { ROFF_ns, ROFF_ps, ROFF_rm, + ROFF_rr, ROFF_so, ROFF_ta, ROFF_tr, @@ -179,6 +181,8 @@ static enum rofferr roff_cond_text(ROFF_ARGS); static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); static int roff_evalcond(const char *, int *); +static int roff_evalnum(const char *, int *, int *, int); +static int roff_evalpar(const char *, int *, int *); static int roff_evalstrcond(const char *, int *); static void roff_free1(struct roff *); static void roff_freereg(struct roffreg *); @@ -201,6 +205,7 @@ static enum rofferr roff_parsetext(char **, size_t *, static enum rofferr roff_res(struct roff *, char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); +static enum rofferr roff_rr(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); static void roff_setstrn(struct roffkv **, const char *, @@ -250,6 +255,7 @@ static struct roffmac roffs[ROFF_MAX] = { { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, { "rm", roff_rm, NULL, NULL, 0, NULL }, + { "rr", roff_rr, NULL, NULL, 0, NULL }, { "so", roff_so, NULL, NULL, 0, NULL }, { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, { "tr", roff_tr, NULL, NULL, 0, NULL }, @@ -479,57 +485,77 @@ roff_alloc(struct mparse *parse, int options) } /* - * In the current line, expand user-defined strings ("\*") - * and references to number registers ("\n"). - * Also check the syntax of other escape sequences. + * In the current line, expand escape sequences that tend to get + * used in numerical expressions and conditional requests. + * Also check the syntax of the remaining escape sequences. */ static enum rofferr roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { char ubuf[12]; /* buffer to print the number */ + const char *start; /* start of the string to process */ const char *stesc; /* start of an escape sequence ('\\') */ const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ char *nbuf; /* new buffer to copy bufp to */ - size_t nsz; /* size of the new buffer */ size_t maxl; /* expected length of the escape name */ size_t naml; /* actual length of the escape name */ + size_t ressz; /* size of the replacement string */ int expand_count; /* to avoid infinite loops */ + int npos; /* position in numeric expression */ + int irc; /* return code from roff_evalnum() */ + char term; /* character terminating the escape */ expand_count = 0; + start = *bufp + pos; + stesc = strchr(start, '\0') - 1; + while (stesc-- > start) { -again: - cp = *bufp + pos; - while (NULL != (cp = strchr(cp, '\\'))) { - stesc = cp++; + /* Search backwards for the next backslash. */ - /* - * The second character must be an asterisk or an n. - * If it isn't, skip it anyway: It is escaped, - * so it can't start another escape sequence. - */ + if ('\\' != *stesc) + continue; - if ('\0' == *cp) - return(ROFF_CONT); + /* If it is escaped, skip it. */ + for (cp = stesc - 1; cp >= start; cp--) + if ('\\' != *cp) + break; + + if (0 == (stesc - cp) % 2) { + stesc = cp; + continue; + } + + /* Decide whether to expand or to check only. */ + + term = '\0'; + cp = stesc + 1; switch (*cp) { case ('*'): res = NULL; break; + case ('B'): + /* FALLTHROUGH */ + case ('w'): + term = cp[1]; + /* FALLTHROUGH */ case ('n'): res = ubuf; break; default: - if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL)) - continue; - mandoc_msg - (MANDOCERR_BADESCAPE, r->parse, - ln, (int)(stesc - *bufp), NULL); - return(ROFF_CONT); + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + mandoc_msg(MANDOCERR_BADESCAPE, r->parse, + ln, (int)(stesc - *bufp), NULL); + continue; } - cp++; + if (EXPAND_LIMIT < ++expand_count) { + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, + ln, (int)(stesc - *bufp), NULL); + return(ROFF_IGN); + } /* * The third character decides the length @@ -537,20 +563,27 @@ again: * Save a pointer to the name. */ - switch (*cp) { - case ('\0'): - return(ROFF_CONT); - case ('('): - cp++; - maxl = 2; - break; - case ('['): - cp++; + if ('\0' == term) { + switch (*++cp) { + case ('\0'): + maxl = 0; + break; + case ('('): + cp++; + maxl = 2; + break; + case ('['): + cp++; + term = ']'; + maxl = 0; + break; + default: + maxl = 1; + break; + } + } else { + cp += 2; maxl = 0; - break; - default: - maxl = 1; - break; } stnam = cp; @@ -562,10 +595,12 @@ again: (MANDOCERR_BADESCAPE, r->parse, ln, (int)(stesc - *bufp), NULL); - return(ROFF_CONT); + break; } - if (0 == maxl && ']' == *cp) + if (0 == maxl && *cp == term) { + cp++; break; + } } /* @@ -573,11 +608,26 @@ again: * undefined, resume searching for escapes. */ - if (NULL == res) + switch (stesc[1]) { + case ('*'): res = roff_getstrn(r, stnam, naml); - else + break; + case ('B'): + npos = 0; + irc = roff_evalnum(stnam, &npos, NULL, 0); + ubuf[0] = irc && stnam + npos + 1 == cp + ? '1' : '0'; + ubuf[1] = '\0'; + break; + case ('n'): snprintf(ubuf, sizeof(ubuf), "%d", roff_getregn(r, stnam, naml)); + break; + case ('w'): + snprintf(ubuf, sizeof(ubuf), "%d", + 24 * (int)naml); + break; + } if (NULL == res) { mandoc_msg @@ -585,29 +635,23 @@ again: ln, (int)(stesc - *bufp), NULL); res = ""; } + ressz = strlen(res); /* Replace the escape sequence by the string. */ - pos = stesc - *bufp; + *szp += ressz + 1; + nbuf = mandoc_malloc(*szp); - nsz = *szp + strlen(res) + 1; - nbuf = mandoc_malloc(nsz); - strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1)); - strlcat(nbuf, res, nsz); - strlcat(nbuf, cp + (maxl ? 0 : 1), nsz); + strlcat(nbuf, res, *szp); + strlcat(nbuf, cp, *szp); - free(*bufp); + /* Prepare for the next replacement. */ + start = nbuf + pos; + stesc = nbuf + (stesc - *bufp) + ressz; + free(*bufp); *bufp = nbuf; - *szp = nsz; - - if (EXPAND_LIMIT >= ++expand_count) - goto again; - - /* Just leave the string unexpanded. */ - mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); - return(ROFF_IGN); } return(ROFF_CONT); } @@ -655,11 +699,7 @@ roff_parsetext(char **bufp, size_t *szp, int pos, int /* Spring the input line trap. */ if (1 == roffit_lines) { - isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro); - if (-1 == isz) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); - } + isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro); free(*bufp); *bufp = p; *szp = isz + 1; @@ -1107,18 +1147,27 @@ roff_cond_text(ROFF_ARGS) return(rr ? ROFF_CONT : ROFF_IGN); } +/* + * Parse a single signed integer number. Stop at the first non-digit. + * If there is at least one digit, return success and advance the + * parse point, else return failure and let the parse point unchanged. + * Ignore overflows, treat them just like the C language. + */ static int roff_getnum(const char *v, int *pos, int *res) { - int p, n; + int myres, n, p; + if (NULL == res) + res = &myres; + p = *pos; n = v[p] == '-'; if (n) p++; for (*res = 0; isdigit((unsigned char)v[p]); p++) - *res += 10 * *res + v[p] - '0'; + *res = 10 * *res + v[p] - '0'; if (p == *pos + n) return 0; @@ -1129,34 +1178,6 @@ roff_getnum(const char *v, int *pos, int *res) return 1; } -static int -roff_getop(const char *v, int *pos, char *res) -{ - int e; - - *res = v[*pos]; - e = v[*pos + 1] == '='; - - switch (*res) { - case '=': - break; - case '>': - if (e) - *res = 'g'; - break; - case '<': - if (e) - *res = 'l'; - break; - default: - return(0); - } - - *pos += 1 + e; - - return(*res); -} - /* * Evaluate a string comparison condition. * The first character is the delimiter. @@ -1200,11 +1221,14 @@ out: return(match); } +/* + * Evaluate an optionally negated single character, numerical, + * or string condition. + */ static int roff_evalcond(const char *v, int *pos) { - int wanttrue, lh, rh; - char op; + int wanttrue, number; if ('!' == v[*pos]) { wanttrue = 0; @@ -1233,27 +1257,10 @@ roff_evalcond(const char *v, int *pos) break; } - if (!roff_getnum(v, pos, &lh)) + if (roff_evalnum(v, pos, &number, 0)) + return((number > 0) == wanttrue); + else return(roff_evalstrcond(v, pos) == wanttrue); - if (!roff_getop(v, pos, &op)) - return((lh > 0) == wanttrue); - if (!roff_getnum(v, pos, &rh)) - return(0); - - switch (op) { - case 'g': - return((lh >= rh) == wanttrue); - case 'l': - return((lh <= rh) == wanttrue); - case '=': - return((lh == rh) == wanttrue); - case '>': - return((lh > rh) == wanttrue); - case '<': - return((lh < rh) == wanttrue); - default: - return(0); - } } /* ARGSUSED */ @@ -1371,6 +1378,204 @@ roff_ds(ROFF_ARGS) return(ROFF_IGN); } +/* + * Parse a single operator, one or two characters long. + * If the operator is recognized, return success and advance the + * parse point, else return failure and let the parse point unchanged. + */ +static int +roff_getop(const char *v, int *pos, char *res) +{ + + *res = v[*pos]; + + switch (*res) { + case ('+'): + /* FALLTHROUGH */ + case ('-'): + /* FALLTHROUGH */ + case ('*'): + /* FALLTHROUGH */ + case ('/'): + /* FALLTHROUGH */ + case ('%'): + /* FALLTHROUGH */ + case ('&'): + /* FALLTHROUGH */ + case (':'): + break; + case '<': + switch (v[*pos + 1]) { + case ('='): + *res = 'l'; + (*pos)++; + break; + case ('>'): + *res = '!'; + (*pos)++; + break; + case ('?'): + *res = 'i'; + (*pos)++; + break; + default: + break; + } + break; + case '>': + switch (v[*pos + 1]) { + case ('='): + *res = 'g'; + (*pos)++; + break; + case ('?'): + *res = 'a'; + (*pos)++; + break; + default: + break; + } + break; + case '=': + if ('=' == v[*pos + 1]) + (*pos)++; + break; + default: + return(0); + } + (*pos)++; + + return(*res); +} + +/* + * Evaluate either a parenthesized numeric expression + * or a single signed integer number. + */ +static int +roff_evalpar(const char *v, int *pos, int *res) +{ + + if ('(' != v[*pos]) + return(roff_getnum(v, pos, res)); + + (*pos)++; + if ( ! roff_evalnum(v, pos, res, 1)) + return(0); + + /* + * Omission of the closing parenthesis + * is an error in validation mode, + * but ignored in evaluation mode. + */ + + if (')' == v[*pos]) + (*pos)++; + else if (NULL == res) + return(0); + + return(1); +} + +/* + * Evaluate a complete numeric expression. + * Proceed left to right, there is no concept of precedence. + */ +static int +roff_evalnum(const char *v, int *pos, int *res, int skipwhite) +{ + int mypos, operand2; + char operator; + + if (NULL == pos) { + mypos = 0; + pos = &mypos; + } + + if (skipwhite) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(v, pos, res)) + return(0); + + while (1) { + if (skipwhite) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_getop(v, pos, &operator)) + break; + + if (skipwhite) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(v, pos, &operand2)) + return(0); + + if (skipwhite) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if (NULL == res) + continue; + + switch (operator) { + case ('+'): + *res += operand2; + break; + case ('-'): + *res -= operand2; + break; + case ('*'): + *res *= operand2; + break; + case ('/'): + *res /= operand2; + break; + case ('%'): + *res %= operand2; + break; + case ('<'): + *res = *res < operand2; + break; + case ('>'): + *res = *res > operand2; + break; + case ('l'): + *res = *res <= operand2; + break; + case ('g'): + *res = *res >= operand2; + break; + case ('='): + *res = *res == operand2; + break; + case ('!'): + *res = *res != operand2; + break; + case ('&'): + *res = *res && operand2; + break; + case (':'): + *res = *res || operand2; + break; + case ('i'): + if (operand2 < *res) + *res = operand2; + break; + case ('a'): + if (operand2 > *res) + *res = operand2; + break; + default: + abort(); + } + } + return(1); +} + void roff_setreg(struct roff *r, const char *name, int val, char sign) { @@ -1480,13 +1685,11 @@ roff_freereg(struct roffreg *reg) } } -/* ARGSUSED */ static enum rofferr roff_nr(ROFF_ARGS) { const char *key; char *val; - size_t sz; int iv; char sign; @@ -1497,11 +1700,34 @@ roff_nr(ROFF_ARGS) if ('+' == sign || '-' == sign) val++; - sz = strspn(val, "0123456789"); - iv = sz ? mandoc_strntoi(val, sz, 10) : 0; + if (roff_evalnum(val, NULL, &iv, 0)) + roff_setreg(r, key, iv, sign); - roff_setreg(r, key, iv, sign); + return(ROFF_IGN); +} +static enum rofferr +roff_rr(ROFF_ARGS) +{ + struct roffreg *reg, **prev; + const char *name; + char *cp; + + cp = *bufp + pos; + name = roff_getname(r, &cp, ln, pos); + + prev = &r->regtab; + while (1) { + reg = *prev; + if (NULL == reg || !strcmp(name, reg->key.p)) + break; + prev = ®->next; + } + if (NULL != reg) { + *prev = reg->next; + free(reg->key.p); + free(reg); + } return(ROFF_IGN); }