=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.328 retrieving revision 1.342 diff -u -p -r1.328 -r1.342 --- mandoc/roff.c 2018/04/11 17:11:13 1.328 +++ mandoc/roff.c 2018/10/25 01:32:40 1.342 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.328 2018/04/11 17:11:13 schwarze Exp $ */ +/* $Id: roff.c,v 1.342 2018/10/25 01:32:40 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze @@ -85,10 +85,21 @@ struct roffreq { char name[]; }; +/* + * A macro processing context. + * More than one is needed when macro calls are nested. + */ +struct mctx { + char **argv; + int argc; + int argsz; +}; + struct roff { struct mparse *parse; /* parse point */ struct roff_man *man; /* mdoc or man parser */ struct roffnode *last; /* leaf of stack */ + struct mctx *mstack; /* stack of macro contexts */ int *rstack; /* stack of inverted `ie' values */ struct ohash *reqtab; /* request lookup table */ struct roffreg *regtab; /* number registers */ @@ -104,10 +115,11 @@ struct roff { struct eqn_node *eqn; /* active equation parser */ int eqn_inline; /* current equation is inline */ int options; /* parse options */ + int mstacksz; /* current size of mstack */ + int mstackpos; /* position in mstack */ int rstacksz; /* current size limit of rstack */ int rstackpos; /* position in rstack */ int format; /* current file in mdoc or man format */ - int argc; /* number of args of the last macro */ char control; /* control character */ char escape; /* escape character */ }; @@ -131,7 +143,7 @@ struct roffnode { int pos, /* current pos in buffer */ \ int *offs /* reset offset of buffer data */ -typedef enum rofferr (*roffproc)(ROFF_ARGS); +typedef int (*roffproc)(ROFF_ARGS); struct roffmac { roffproc proc; /* process new macro */ @@ -151,26 +163,27 @@ struct predef { /* --- function prototypes ------------------------------------------------ */ -static void roffnode_cleanscope(struct roff *); -static void roffnode_pop(struct roff *); +static int roffnode_cleanscope(struct roff *); +static int roffnode_pop(struct roff *); static void roffnode_push(struct roff *, enum roff_tok, const char *, int, int); static void roff_addtbl(struct roff_man *, struct tbl_node *); -static enum rofferr roff_als(ROFF_ARGS); -static enum rofferr roff_block(ROFF_ARGS); -static enum rofferr roff_block_text(ROFF_ARGS); -static enum rofferr roff_block_sub(ROFF_ARGS); -static enum rofferr roff_br(ROFF_ARGS); -static enum rofferr roff_cblock(ROFF_ARGS); -static enum rofferr roff_cc(ROFF_ARGS); -static void roff_ccond(struct roff *, int, int); -static enum rofferr roff_cond(ROFF_ARGS); -static enum rofferr roff_cond_text(ROFF_ARGS); -static enum rofferr roff_cond_sub(ROFF_ARGS); -static enum rofferr roff_ds(ROFF_ARGS); -static enum rofferr roff_ec(ROFF_ARGS); -static enum rofferr roff_eo(ROFF_ARGS); -static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); +static int roff_als(ROFF_ARGS); +static int roff_block(ROFF_ARGS); +static int roff_block_text(ROFF_ARGS); +static int roff_block_sub(ROFF_ARGS); +static int roff_br(ROFF_ARGS); +static int roff_cblock(ROFF_ARGS); +static int roff_cc(ROFF_ARGS); +static int roff_ccond(struct roff *, int, int); +static int roff_char(ROFF_ARGS); +static int roff_cond(ROFF_ARGS); +static int roff_cond_text(ROFF_ARGS); +static int roff_cond_sub(ROFF_ARGS); +static int roff_ds(ROFF_ARGS); +static int roff_ec(ROFF_ARGS); +static int roff_eo(ROFF_ARGS); +static int roff_eqndelim(struct roff *, struct buf *, int); static int roff_evalcond(struct roff *r, int, char *, int *); static int roff_evalnum(struct roff *, int, const char *, int *, int *, int); @@ -191,39 +204,42 @@ static const char *roff_getstrn(struct roff *, const char *, size_t, int *); static int roff_hasregn(const struct roff *, const char *, size_t); -static enum rofferr roff_insec(ROFF_ARGS); -static enum rofferr roff_it(ROFF_ARGS); -static enum rofferr roff_line_ignore(ROFF_ARGS); +static int roff_insec(ROFF_ARGS); +static int roff_it(ROFF_ARGS); +static int roff_line_ignore(ROFF_ARGS); static void roff_man_alloc1(struct roff_man *); static void roff_man_free1(struct roff_man *); -static enum rofferr roff_manyarg(ROFF_ARGS); -static enum rofferr roff_nr(ROFF_ARGS); -static enum rofferr roff_onearg(ROFF_ARGS); +static int roff_manyarg(ROFF_ARGS); +static int roff_nop(ROFF_ARGS); +static int roff_nr(ROFF_ARGS); +static int roff_onearg(ROFF_ARGS); static enum roff_tok roff_parse(struct roff *, char *, int *, int, int); -static enum rofferr roff_parsetext(struct roff *, struct buf *, +static int roff_parsetext(struct roff *, struct buf *, int, int *); -static enum rofferr roff_renamed(ROFF_ARGS); -static enum rofferr roff_res(struct roff *, struct buf *, int, int); -static enum rofferr roff_rm(ROFF_ARGS); -static enum rofferr roff_rn(ROFF_ARGS); -static enum rofferr roff_rr(ROFF_ARGS); +static int roff_renamed(ROFF_ARGS); +static int roff_res(struct roff *, struct buf *, int, int); +static int roff_return(ROFF_ARGS); +static int roff_rm(ROFF_ARGS); +static int roff_rn(ROFF_ARGS); +static int roff_rr(ROFF_ARGS); static void roff_setregn(struct roff *, const char *, size_t, int, char, int); static void roff_setstr(struct roff *, const char *, const char *, int); static void roff_setstrn(struct roffkv **, const char *, size_t, const char *, size_t, int); -static enum rofferr roff_so(ROFF_ARGS); -static enum rofferr roff_tr(ROFF_ARGS); -static enum rofferr roff_Dd(ROFF_ARGS); -static enum rofferr roff_TE(ROFF_ARGS); -static enum rofferr roff_TS(ROFF_ARGS); -static enum rofferr roff_EQ(ROFF_ARGS); -static enum rofferr roff_EN(ROFF_ARGS); -static enum rofferr roff_T_(ROFF_ARGS); -static enum rofferr roff_unsupp(ROFF_ARGS); -static enum rofferr roff_userdef(ROFF_ARGS); +static int roff_shift(ROFF_ARGS); +static int roff_so(ROFF_ARGS); +static int roff_tr(ROFF_ARGS); +static int roff_Dd(ROFF_ARGS); +static int roff_TE(ROFF_ARGS); +static int roff_TS(ROFF_ARGS); +static int roff_EQ(ROFF_ARGS); +static int roff_EN(ROFF_ARGS); +static int roff_T_(ROFF_ARGS); +static int roff_unsupp(ROFF_ARGS); +static int roff_userdef(ROFF_ARGS); /* --- constant data ------------------------------------------------------ */ @@ -326,6 +342,7 @@ const char *__roff_name[MAN_MAX + 1] = { "Dx", "%Q", "%U", "Ta", NULL, "TH", "SH", "SS", "TP", + "TQ", "LP", "PP", "P", "IP", "HP", "SM", "SB", "BI", "IB", "BR", "RB", "R", @@ -333,7 +350,8 @@ const char *__roff_name[MAN_MAX + 1] = { "nf", "fi", "RE", "RS", "DT", "UC", "PD", "AT", "in", - "OP", "EX", "EE", "UR", + "SY", "YS", "OP", + "EX", "EE", "UR", "UE", "MT", "ME", NULL }; const char *const *roff_name = __roff_name; @@ -380,7 +398,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_insec, NULL, NULL, 0 }, /* cf */ { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ { roff_line_ignore, NULL, NULL, 0 }, /* ch */ - { roff_unsupp, NULL, NULL, 0 }, /* char */ + { roff_char, NULL, NULL, 0 }, /* char */ { roff_unsupp, NULL, NULL, 0 }, /* chop */ { roff_line_ignore, NULL, NULL, 0 }, /* class */ { roff_insec, NULL, NULL, 0 }, /* close */ @@ -490,7 +508,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ { roff_unsupp, NULL, NULL, 0 }, /* nm */ { roff_unsupp, NULL, NULL, 0 }, /* nn */ - { roff_unsupp, NULL, NULL, 0 }, /* nop */ + { roff_nop, NULL, NULL, 0 }, /* nop */ { roff_nr, NULL, NULL, 0 }, /* nr */ { roff_unsupp, NULL, NULL, 0 }, /* nrf */ { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ @@ -519,7 +537,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_unsupp, NULL, NULL, 0 }, /* rchar */ { roff_line_ignore, NULL, NULL, 0 }, /* rd */ { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ - { roff_unsupp, NULL, NULL, 0 }, /* return */ + { roff_return, NULL, NULL, 0 }, /* return */ { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ { roff_rm, NULL, NULL, 0 }, /* rm */ @@ -531,7 +549,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_unsupp, NULL, NULL, 0 }, /* schar */ { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ { roff_line_ignore, NULL, NULL, 0 }, /* shc */ - { roff_unsupp, NULL, NULL, 0 }, /* shift */ + { roff_shift, NULL, NULL, 0 }, /* shift */ { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ { roff_so, NULL, NULL, 0 }, /* so */ { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ @@ -573,7 +591,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ { roff_unsupp, NULL, NULL, 0 }, /* wh */ - { roff_unsupp, NULL, NULL, 0 }, /* while */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ { roff_insec, NULL, NULL, 0 }, /* write */ { roff_insec, NULL, NULL, 0 }, /* writec */ { roff_insec, NULL, NULL, 0 }, /* writem */ @@ -657,18 +675,19 @@ roffhash_find(struct ohash *htab, const char *name, si * Pop the current node off of the stack of roff instructions currently * pending. */ -static void +static int roffnode_pop(struct roff *r) { struct roffnode *p; + int inloop; - assert(r->last); p = r->last; - - r->last = r->last->parent; + inloop = p->tok == ROFF_while; + r->last = p->parent; free(p->name); free(p->end); free(p); + return inloop; } /* @@ -711,6 +730,9 @@ roff_free1(struct roff *r) eqn_free(r->last_eqn); r->last_eqn = r->eqn = NULL; + while (r->mstackpos >= 0) + roff_userret(r); + while (r->last) roffnode_pop(r); @@ -750,7 +772,12 @@ roff_reset(struct roff *r) void roff_free(struct roff *r) { + int i; + roff_free1(r); + for (i = 0; i < r->mstacksz; i++) + free(r->mstack[i].argv); + free(r->mstack); roffhash_free(r->reqtab); free(r); } @@ -765,6 +792,7 @@ roff_alloc(struct mparse *parse, int options) r->reqtab = roffhash_alloc(0, ROFF_RENAMED); r->options = options; r->format = options & (MPARSE_MDOC | MPARSE_MAN); + r->mstackpos = -1; r->rstackpos = -1; r->escape = '\\'; return r; @@ -1118,9 +1146,10 @@ deroff(char **dest, const struct roff_node *n) * used in numerical expressions and conditional requests. * Also check the syntax of the remaining escape sequences. */ -static enum rofferr +static int roff_res(struct roff *r, struct buf *buf, int ln, int pos) { + struct mctx *ctx; /* current macro call context */ char ubuf[24]; /* buffer to print the number */ struct roff_node *n; /* used for header comments */ const char *start; /* start of the string to process */ @@ -1132,11 +1161,14 @@ roff_res(struct roff *r, struct buf *buf, int ln, int char *nbuf; /* new buffer to copy buf->buf to */ size_t maxl; /* expected length of the escape name */ size_t naml; /* actual length of the escape name */ + size_t asz; /* length of the replacement */ + size_t rsz; /* length of the rest of the string */ enum mandoc_esc esc; /* type of the escape sequence */ int inaml; /* length returned from mandoc_escape() */ int expand_count; /* to avoid infinite loops */ int npos; /* position in numeric expression */ int arg_complete; /* argument not interrupted by eol */ + int quote_args; /* true for \\$@, false for \\$* */ int done; /* no more input available */ int deftype; /* type of definition to paste */ int rcsid; /* kind of RCS id seen */ @@ -1202,9 +1234,17 @@ roff_res(struct roff *r, struct buf *buf, int ln, int r->man->next = ROFF_NEXT_SIBLING; } - /* Discard comments. */ + /* Line continuation with comment. */ - while (stesc > start && stesc[-1] == ' ') + if (stesc[1] == '#') { + *stesc = '\0'; + return ROFF_IGN | ROFF_APPEND; + } + + /* Discard normal comments. */ + + while (stesc > start && stesc[-1] == ' ' && + (stesc == start + 1 || stesc[-2] != '\\')) stesc--; *stesc = '\0'; break; @@ -1256,7 +1296,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int if (done) continue; else - return ROFF_APPEND; + return ROFF_IGN | ROFF_APPEND; } /* Decide whether to expand or to check only. */ @@ -1265,6 +1305,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int cp = stesc + 1; switch (*cp) { case '*': + case '$': res = NULL; break; case 'B': @@ -1366,8 +1407,77 @@ roff_res(struct roff *r, struct buf *buf, int ln, int if (arg_complete) { deftype = ROFFDEF_USER | ROFFDEF_PRE; res = roff_getstrn(r, stnam, naml, &deftype); + + /* + * If not overriden, let \*(.T + * through to the formatters. + */ + + if (res == NULL && naml == 2 && + stnam[0] == '.' && stnam[1] == 'T') { + roff_setstrn(&r->strtab, + ".T", 2, NULL, 0, 0); + stesc--; + continue; + } } break; + case '$': + if (r->mstackpos < 0) { + mandoc_vmsg(MANDOCERR_ARG_UNDEF, + r->parse, ln, (int)(stesc - buf->buf), + "%.3s", stesc); + break; + } + ctx = r->mstack + r->mstackpos; + npos = stesc[2] - '1'; + if (npos >= 0 && npos <= 8) { + res = npos < ctx->argc ? + ctx->argv[npos] : ""; + break; + } + if (stesc[2] == '*') + quote_args = 0; + else if (stesc[2] == '@') + quote_args = 1; + else { + mandoc_vmsg(MANDOCERR_ARG_NONUM, + r->parse, ln, (int)(stesc - buf->buf), + "%.3s", stesc); + break; + } + asz = 0; + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + asz++; /* blank */ + if (quote_args) + asz += 2; /* quotes */ + asz += strlen(ctx->argv[npos]); + } + if (asz != 3) { + rsz = buf->sz - (stesc - buf->buf) - 3; + if (asz < 3) + memmove(stesc + asz, stesc + 3, rsz); + buf->sz += asz - 3; + nbuf = mandoc_realloc(buf->buf, buf->sz); + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + buf->buf = nbuf; + if (asz > 3) + memmove(stesc + asz, stesc + 3, rsz); + } + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + *stesc++ = ' '; + if (quote_args) + *stesc++ = '"'; + cp = ctx->argv[npos]; + while (*cp != '\0') + *stesc++ = *cp++; + if (quote_args) + *stesc++ = '"'; + } + continue; case 'B': npos = 0; ubuf[0] = arg_complete && @@ -1391,9 +1501,10 @@ roff_res(struct roff *r, struct buf *buf, int ln, int } if (res == NULL) { - mandoc_vmsg(MANDOCERR_STR_UNDEF, - r->parse, ln, (int)(stesc - buf->buf), - "%.*s", (int)naml, stnam); + if (stesc[1] == '*') + mandoc_vmsg(MANDOCERR_STR_UNDEF, + r->parse, ln, (int)(stesc - buf->buf), + "%.*s", (int)naml, stnam); res = ""; } else if (buf->sz + strlen(res) > SHRT_MAX) { mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, @@ -1420,7 +1531,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int /* * Process text streams. */ -static enum rofferr +static int roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) { size_t sz; @@ -1486,11 +1597,11 @@ roff_parsetext(struct roff *r, struct buf *buf, int po return ROFF_CONT; } -enum rofferr +int roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) { enum roff_tok t; - enum rofferr e; + int e; int pos; /* parse point */ int spos; /* saved parse point for messages */ int ppos; /* original offset in buf->buf */ @@ -1512,7 +1623,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, /* Expand some escape sequences. */ e = roff_res(r, buf, ln, pos); - if (e == ROFF_IGN || e == ROFF_APPEND) + if ((e & ROFF_MASK) == ROFF_IGN) return e; assert(e == ROFF_CONT); @@ -1529,21 +1640,22 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, if (r->last != NULL && ! ctl) { t = r->last->tok; e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); - if (e == ROFF_IGN) + if ((e & ROFF_MASK) == ROFF_IGN) return e; - assert(e == ROFF_CONT); - } + e &= ~ROFF_MASK; + } else + e = ROFF_IGN; if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { eqn_read(r->eqn, buf->buf + ppos); - return ROFF_IGN; + return e; } if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { tbl_read(r->tbl, ln, buf->buf, ppos); roff_addtbl(r->man, r->tbl); - return ROFF_IGN; + return e; } if ( ! ctl) - return roff_parsetext(r, buf, pos, offs); + return roff_parsetext(r, buf, pos, offs) | e; /* Skip empty request lines. */ @@ -1611,7 +1723,26 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); } +/* + * Internal interface function to tell the roff parser that execution + * of the current macro ended. This is required because macro + * definitions usually do not end with a .return request. + */ void +roff_userret(struct roff *r) +{ + struct mctx *ctx; + int i; + + assert(r->mstackpos >= 0); + ctx = r->mstack + r->mstackpos; + for (i = 0; i < ctx->argc; i++) + free(ctx->argv[i]); + ctx->argc = 0; + r->mstackpos--; +} + +void roff_endparse(struct roff *r) { if (r->last != NULL) @@ -1680,7 +1811,7 @@ roff_parse(struct roff *r, char *buf, int *pos, int ln /* --- handling of request blocks ----------------------------------------- */ -static enum rofferr +static int roff_cblock(ROFF_ARGS) { @@ -1720,50 +1851,51 @@ roff_cblock(ROFF_ARGS) } -static void +static int roffnode_cleanscope(struct roff *r) { + int inloop; - while (r->last) { + inloop = 0; + while (r->last != NULL) { if (--r->last->endspan != 0) break; - roffnode_pop(r); + inloop += roffnode_pop(r); } + return inloop; } -static void +static int roff_ccond(struct roff *r, int ln, int ppos) { - if (NULL == r->last) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "\\}"); - return; + return 0; } switch (r->last->tok) { case ROFF_el: case ROFF_ie: case ROFF_if: + case ROFF_while: break; default: mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "\\}"); - return; + return 0; } if (r->last->endspan > -1) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "\\}"); - return; + return 0; } - roffnode_pop(r); - roffnode_cleanscope(r); - return; + return roffnode_pop(r) + roffnode_cleanscope(r); } -static enum rofferr +static int roff_block(ROFF_ARGS) { const char *name, *value; @@ -1888,7 +2020,7 @@ roff_block(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_block_sub(ROFF_ARGS) { enum roff_tok t; @@ -1942,7 +2074,7 @@ roff_block_sub(ROFF_ARGS) return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); } -static enum rofferr +static int roff_block_text(ROFF_ARGS) { @@ -1952,15 +2084,19 @@ roff_block_text(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_cond_sub(ROFF_ARGS) { - enum roff_tok t; char *ep; - int rr; + int endloop, irc, rr; + enum roff_tok t; + irc = ROFF_IGN; rr = r->last->rule; - roffnode_cleanscope(r); + endloop = tok != ROFF_while ? ROFF_IGN : + rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; + if (roffnode_cleanscope(r)) + irc |= endloop; /* * If `\}' occurs on a macro line without a preceding macro, @@ -1977,7 +2113,8 @@ roff_cond_sub(ROFF_ARGS) switch (ep[1]) { case '}': memmove(ep, ep + 2, strlen(ep + 2) + 1); - roff_ccond(r, ln, ep - buf->buf); + if (roff_ccond(r, ln, ep - buf->buf)) + irc |= endloop; break; case '\0': ++ep; @@ -1994,30 +2131,38 @@ roff_cond_sub(ROFF_ARGS) */ t = roff_parse(r, buf->buf, &pos, ln, ppos); - return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) - ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr - ? ROFF_CONT : ROFF_IGN; + irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ? + (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : + rr ? ROFF_CONT : ROFF_IGN; + return irc; } -static enum rofferr +static int roff_cond_text(ROFF_ARGS) { char *ep; - int rr; + int endloop, irc, rr; + irc = ROFF_IGN; rr = r->last->rule; - roffnode_cleanscope(r); + endloop = tok != ROFF_while ? ROFF_IGN : + rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; + if (roffnode_cleanscope(r)) + irc |= endloop; ep = buf->buf + pos; while ((ep = strchr(ep, '\\')) != NULL) { if (*(++ep) == '}') { *ep = '&'; - roff_ccond(r, ln, ep - buf->buf - 1); + if (roff_ccond(r, ln, ep - buf->buf - 1)) + irc |= endloop; } if (*ep != '\0') ++ep; } - return rr ? ROFF_CONT : ROFF_IGN; + if (rr) + irc |= ROFF_CONT; + return irc; } /* --- handling of numeric and conditional expressions -------------------- */ @@ -2144,9 +2289,10 @@ out: static int roff_evalcond(struct roff *r, int ln, char *v, int *pos) { - char *cp, *name; - size_t sz; - int deftype, number, savepos, istrue, wanttrue; + const char *start, *end; + char *cp, *name; + size_t sz; + int deftype, len, number, savepos, istrue, wanttrue; if ('!' == v[*pos]) { wanttrue = 0; @@ -2161,12 +2307,50 @@ roff_evalcond(struct roff *r, int ln, char *v, int *po case 'o': (*pos)++; return wanttrue; - case 'c': case 'e': case 't': case 'v': (*pos)++; return !wanttrue; + case 'c': + do { + (*pos)++; + } while (v[*pos] == ' '); + + /* + * Quirk for groff compatibility: + * The horizontal tab is neither available nor unavailable. + */ + + if (v[*pos] == '\t') { + (*pos)++; + return 0; + } + + /* Printable ASCII characters are available. */ + + if (v[*pos] != '\\') { + (*pos)++; + return wanttrue; + } + + end = v + ++*pos; + switch (mandoc_escape(&end, &start, &len)) { + case ESCAPE_SPECIAL: + istrue = mchars_spec2cp(start, len) != -1; + break; + case ESCAPE_UNICODE: + istrue = 1; + break; + case ESCAPE_NUMBERED: + istrue = mchars_num2char(start, len) != -1; + break; + default: + istrue = !wanttrue; + break; + } + *pos = end - v; + return istrue == wanttrue; case 'd': case 'r': cp = v + *pos + 1; @@ -2198,14 +2382,14 @@ roff_evalcond(struct roff *r, int ln, char *v, int *po return 0; } -static enum rofferr +static int roff_line_ignore(ROFF_ARGS) { return ROFF_IGN; } -static enum rofferr +static int roff_insec(ROFF_ARGS) { @@ -2214,7 +2398,7 @@ roff_insec(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_unsupp(ROFF_ARGS) { @@ -2223,9 +2407,10 @@ roff_unsupp(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_cond(ROFF_ARGS) { + int irc; roffnode_push(r, tok, NULL, ln, ppos); @@ -2264,9 +2449,10 @@ roff_cond(ROFF_ARGS) * Determine scope. * If there is nothing on the line after the conditional, * not even whitespace, use next-line scope. + * Except that .while does not support next-line scope. */ - if (buf->buf[pos] == '\0') { + if (buf->buf[pos] == '\0' && tok != ROFF_while) { r->last->endspan = 2; goto out; } @@ -2298,10 +2484,13 @@ roff_cond(ROFF_ARGS) out: *offs = pos; - return ROFF_RERUN; + irc = ROFF_RERUN; + if (tok == ROFF_while) + irc |= ROFF_WHILE; + return irc; } -static enum rofferr +static int roff_ds(ROFF_ARGS) { char *string; @@ -2600,7 +2789,7 @@ roff_getregro(const struct roff *r, const char *name) switch (*name) { case '$': /* Number of arguments of the last macro evaluated. */ - return r->argc; + return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; case 'A': /* ASCII approximation mode is always off. */ return 0; case 'g': /* Groff compatibility mode is always on. */ @@ -2690,7 +2879,7 @@ roff_freereg(struct roffreg *reg) } } -static enum rofferr +static int roff_nr(ROFF_ARGS) { char *key, *val, *step; @@ -2724,7 +2913,7 @@ roff_nr(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_rr(ROFF_ARGS) { struct roffreg *reg, **prev; @@ -2754,7 +2943,7 @@ roff_rr(ROFF_ARGS) /* --- handler functions for roff requests -------------------------------- */ -static enum rofferr +static int roff_rm(ROFF_ARGS) { const char *name; @@ -2773,7 +2962,7 @@ roff_rm(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_it(ROFF_ARGS) { int iv; @@ -2802,7 +2991,7 @@ roff_it(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_Dd(ROFF_ARGS) { int mask; @@ -2832,7 +3021,7 @@ roff_Dd(ROFF_ARGS) return ROFF_CONT; } -static enum rofferr +static int roff_TE(ROFF_ARGS) { if (r->tbl == NULL) { @@ -2845,13 +3034,14 @@ roff_TE(ROFF_ARGS) free(buf->buf); buf->buf = mandoc_strdup(".sp"); buf->sz = 4; + *offs = 0; return ROFF_REPARSE; } r->tbl = NULL; return ROFF_IGN; } -static enum rofferr +static int roff_T_(ROFF_ARGS) { @@ -2867,7 +3057,7 @@ roff_T_(ROFF_ARGS) /* * Handle in-line equation delimiters. */ -static enum rofferr +static int roff_eqndelim(struct roff *r, struct buf *buf, int pos) { char *cp1, *cp2; @@ -2930,7 +3120,7 @@ roff_eqndelim(struct roff *r, struct buf *buf, int pos return ROFF_REPARSE; } -static enum rofferr +static int roff_EQ(ROFF_ARGS) { struct roff_node *n; @@ -2960,7 +3150,7 @@ roff_EQ(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_EN(ROFF_ARGS) { if (r->eqn != NULL) { @@ -2974,7 +3164,7 @@ roff_EN(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_TS(ROFF_ARGS) { if (r->tbl != NULL) { @@ -2991,7 +3181,7 @@ roff_TS(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_onearg(ROFF_ARGS) { struct roff_node *n; @@ -3051,7 +3241,7 @@ roff_onearg(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_manyarg(ROFF_ARGS) { struct roff_node *n; @@ -3074,7 +3264,7 @@ roff_manyarg(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_als(ROFF_ARGS) { char *oldn, *newn, *end, *value; @@ -3093,7 +3283,7 @@ roff_als(ROFF_ARGS) if (oldsz == 0) return ROFF_IGN; - valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n", + valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", (int)oldsz, oldn); roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); @@ -3101,7 +3291,7 @@ roff_als(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_br(ROFF_ARGS) { if (r->man->flags & (MAN_BLINE | MAN_ELINE)) @@ -3115,7 +3305,7 @@ roff_br(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_cc(ROFF_ARGS) { const char *p; @@ -3132,7 +3322,78 @@ roff_cc(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int +roff_char(ROFF_ARGS) +{ + const char *p, *kp, *vp; + size_t ksz, vsz; + int font; + + /* Parse the character to be replaced. */ + + kp = buf->buf + pos; + p = kp + 1; + if (*kp == '\0' || (*kp == '\\' && + mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || + (*p != ' ' && *p != '\0')) { + mandoc_vmsg(MANDOCERR_CHAR_ARG, r->parse, + ln, pos, "char %s", kp); + return ROFF_IGN; + } + ksz = p - kp; + while (*p == ' ') + p++; + + /* + * If the replacement string contains a font escape sequence, + * we have to restore the font at the end. + */ + + vp = p; + vsz = strlen(p); + font = 0; + while (*p != '\0') { + if (*p++ != '\\') + continue; + switch (mandoc_escape(&p, NULL, NULL)) { + case ESCAPE_FONT: + case ESCAPE_FONTROMAN: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTBI: + case ESCAPE_FONTCW: + case ESCAPE_FONTPREV: + font++; + break; + default: + break; + } + } + if (font > 1) + mandoc_msg(MANDOCERR_CHAR_FONT, r->parse, + ln, vp - buf->buf, vp); + + /* + * Approximate the effect of .char using the .tr tables. + * XXX In groff, .char and .tr interact differently. + */ + + if (ksz == 1) { + if (r->xtab == NULL) + r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); + assert((unsigned int)*kp < 128); + free(r->xtab[(int)*kp].p); + r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, + "%s%s", vp, font ? "\fP" : ""); + } else { + roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); + if (font) + roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); + } + return ROFF_IGN; +} + +static int roff_ec(ROFF_ARGS) { const char *p; @@ -3149,7 +3410,7 @@ roff_ec(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int roff_eo(ROFF_ARGS) { r->escape = '\0'; @@ -3159,7 +3420,16 @@ roff_eo(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int +roff_nop(ROFF_ARGS) +{ + while (buf->buf[pos] == ' ') + pos++; + *offs = pos; + return ROFF_RERUN; +} + +static int roff_tr(ROFF_ARGS) { const char *p, *first, *second; @@ -3221,7 +3491,23 @@ roff_tr(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +/* + * Implementation of the .return request. + * There is no need to call roff_userret() from here. + * The read module will call that after rewinding the reader stack + * to the place from where the current macro was called. + */ +static int +roff_return(ROFF_ARGS) +{ + if (r->mstackpos >= 0) + return ROFF_IGN | ROFF_USERRET; + + mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "return"); + return ROFF_IGN; +} + +static int roff_rn(ROFF_ARGS) { const char *value; @@ -3271,7 +3557,40 @@ roff_rn(ROFF_ARGS) return ROFF_IGN; } -static enum rofferr +static int +roff_shift(ROFF_ARGS) +{ + struct mctx *ctx; + int levels, i; + + levels = 1; + if (buf->buf[pos] != '\0' && + roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { + mandoc_vmsg(MANDOCERR_CE_NONUM, r->parse, + ln, pos, "shift %s", buf->buf + pos); + levels = 1; + } + if (r->mstackpos < 0) { + mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "shift"); + return ROFF_IGN; + } + ctx = r->mstack + r->mstackpos; + if (levels > ctx->argc) { + mandoc_vmsg(MANDOCERR_SHIFT, r->parse, + ln, pos, "%d, but max is %d", levels, ctx->argc); + levels = ctx->argc; + } + if (levels == 0) + return ROFF_IGN; + for (i = 0; i < levels; i++) + free(ctx->argv[i]); + ctx->argc -= levels; + for (i = 0; i < ctx->argc; i++) + ctx->argv[i] = ctx->argv[i + levels]; + return ROFF_IGN; +} + +static int roff_so(ROFF_ARGS) { char *name, *cp; @@ -3303,153 +3622,68 @@ roff_so(ROFF_ARGS) /* --- user defined strings and macros ------------------------------------ */ -static enum rofferr +static int roff_userdef(ROFF_ARGS) { - const char *arg[16], *ap; - char *cp, *n1, *n2; - int expand_count, i, ib, ie; - size_t asz, rsz; + struct mctx *ctx; + char *arg, *ap, *dst, *src; + size_t sz; - /* - * Collect pointers to macro argument strings - * and NUL-terminate them. - */ + /* Initialize a new macro stack context. */ - r->argc = 0; - cp = buf->buf + pos; - for (i = 0; i < 16; i++) { - if (*cp == '\0') - arg[i] = ""; - else { - arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); - r->argc = i + 1; - } + if (++r->mstackpos == r->mstacksz) { + r->mstack = mandoc_recallocarray(r->mstack, + r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); + r->mstacksz += 8; } + ctx = r->mstack + r->mstackpos; + ctx->argsz = 0; + ctx->argc = 0; + ctx->argv = NULL; /* - * Expand macro arguments. + * Collect pointers to macro argument strings, + * NUL-terminating them and escaping quotes. */ - buf->sz = strlen(r->current_string) + 1; - n1 = n2 = cp = mandoc_malloc(buf->sz); - memcpy(n1, r->current_string, buf->sz); - expand_count = 0; - while (*cp != '\0') { - - /* Scan ahead for the next argument invocation. */ - - if (*cp++ != '\\') - continue; - if (*cp++ != '$') - continue; - if (*cp == '*') { /* \\$* inserts all arguments */ - ib = 0; - ie = r->argc - 1; - } else { /* \\$1 .. \\$9 insert one argument */ - ib = ie = *cp - '1'; - if (ib < 0 || ib > 8) - continue; + src = buf->buf + pos; + while (*src != '\0') { + if (ctx->argc == ctx->argsz) { + ctx->argsz += 8; + ctx->argv = mandoc_reallocarray(ctx->argv, + ctx->argsz, sizeof(*ctx->argv)); } - cp -= 2; - - /* - * Prevent infinite recursion. - */ - - if (cp >= n2) - expand_count = 1; - else if (++expand_count > EXPAND_LIMIT) { - mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, - ln, (int)(cp - n1), NULL); - free(buf->buf); - buf->buf = n1; - return ROFF_IGN; + arg = mandoc_getarg(r->parse, &src, ln, &pos); + sz = 1; /* For the terminating NUL. */ + for (ap = arg; *ap != '\0'; ap++) + sz += *ap == '"' ? 4 : 1; + ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); + for (ap = arg; *ap != '\0'; ap++) { + if (*ap == '"') { + memcpy(dst, "\\(dq", 4); + dst += 4; + } else + *dst++ = *ap; } - - /* - * Determine the size of the expanded argument, - * taking escaping of quotes into account. - */ - - asz = ie > ib ? ie - ib : 0; /* for blanks */ - for (i = ib; i <= ie; i++) { - for (ap = arg[i]; *ap != '\0'; ap++) { - asz++; - if (*ap == '"') - asz += 3; - } - } - if (asz != 3) { - - /* - * Determine the size of the rest of the - * unexpanded macro, including the NUL. - */ - - rsz = buf->sz - (cp - n1) - 3; - - /* - * When shrinking, move before - * releasing the storage. - */ - - if (asz < 3) - memmove(cp + asz, cp + 3, rsz); - - /* - * Resize the storage for the macro - * and readjust the parse pointer. - */ - - buf->sz += asz - 3; - n2 = mandoc_realloc(n1, buf->sz); - cp = n2 + (cp - n1); - n1 = n2; - - /* - * When growing, make room - * for the expanded argument. - */ - - if (asz > 3) - memmove(cp + asz, cp + 3, rsz); - } - - /* Copy the expanded argument, escaping quotes. */ - - n2 = cp; - for (i = ib; i <= ie; i++) { - for (ap = arg[i]; *ap != '\0'; ap++) { - if (*ap == '"') { - memcpy(n2, "\\(dq", 4); - n2 += 4; - } else - *n2++ = *ap; - } - if (i < ie) - *n2++ = ' '; - } + *dst = '\0'; } - /* - * Replace the macro invocation - * by the expanded macro. - */ + /* Replace the macro invocation by the macro definition. */ free(buf->buf); - buf->buf = n1; + buf->buf = mandoc_strdup(r->current_string); + buf->sz = strlen(buf->buf) + 1; *offs = 0; return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? - ROFF_REPARSE : ROFF_APPEND; + ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; } /* * Calling a high-level macro that was renamed with .rn. * r->current_string has already been set up by roff_parse(). */ -static enum rofferr +static int roff_renamed(ROFF_ARGS) { char *nbuf; @@ -3458,6 +3692,7 @@ roff_renamed(ROFF_ARGS) buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; free(buf->buf); buf->buf = nbuf; + *offs = 0; return ROFF_CONT; }