=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.318 retrieving revision 1.339 diff -u -p -r1.318 -r1.339 --- mandoc/roff.c 2017/07/04 22:52:00 1.318 +++ mandoc/roff.c 2018/08/23 14:29:39 1.339 @@ -1,7 +1,7 @@ -/* $Id: roff.c,v 1.318 2017/07/04 22:52:00 schwarze Exp $ */ +/* $Id: roff.c,v 1.339 2018/08/23 14:29:39 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons - * Copyright (c) 2010-2015, 2017 Ingo Schwarze + * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -46,6 +46,7 @@ #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ ROFFDEF_REN | ROFFDEF_STD) +#define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ /* --- data types --------------------------------------------------------- */ @@ -72,6 +73,7 @@ struct roffkv { struct roffreg { struct roffstr key; int val; + int step; struct roffreg *next; }; @@ -83,10 +85,21 @@ struct roffreq { char name[]; }; +/* + * A macro processing context. + * More than one is needed when macro calls are nested. + */ +struct mctx { + char **argv; + int argc; + int argsz; +}; + struct roff { struct mparse *parse; /* parse point */ struct roff_man *man; /* mdoc or man parser */ struct roffnode *last; /* leaf of stack */ + struct mctx *mstack; /* stack of macro contexts */ int *rstack; /* stack of inverted `ie' values */ struct ohash *reqtab; /* request lookup table */ struct roffreg *regtab; /* number registers */ @@ -98,15 +111,15 @@ struct roff { struct tbl_node *first_tbl; /* first table parsed */ struct tbl_node *last_tbl; /* last table parsed */ struct tbl_node *tbl; /* current table being parsed */ - struct eqn_node *last_eqn; /* last equation parsed */ - struct eqn_node *first_eqn; /* first equation parsed */ - struct eqn_node *eqn; /* current equation being parsed */ + struct eqn_node *last_eqn; /* equation parser */ + struct eqn_node *eqn; /* active equation parser */ int eqn_inline; /* current equation is inline */ int options; /* parse options */ + int mstacksz; /* current size of mstack */ + int mstackpos; /* position in mstack */ int rstacksz; /* current size limit of rstack */ int rstackpos; /* position in rstack */ int format; /* current file in mdoc or man format */ - int argc; /* number of args of the last macro */ char control; /* control character */ char escape; /* escape character */ }; @@ -154,6 +167,7 @@ static void roffnode_cleanscope(struct roff *); static void roffnode_pop(struct roff *); static void roffnode_push(struct roff *, enum roff_tok, const char *, int, int); +static void roff_addtbl(struct roff_man *, struct tbl_node *); static enum rofferr roff_als(ROFF_ARGS); static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); @@ -181,11 +195,11 @@ static void roff_freestr(struct roffkv *); static size_t roff_getname(struct roff *, char **, int, int); static int roff_getnum(const char *, int *, int *, int); static int roff_getop(const char *, int *, char *); -static int roff_getregn(const struct roff *, - const char *, size_t); +static int roff_getregn(struct roff *, + const char *, size_t, char); static int roff_getregro(const struct roff *, const char *name); -static const char *roff_getstrn(const struct roff *, +static const char *roff_getstrn(struct roff *, const char *, size_t, int *); static int roff_hasregn(const struct roff *, const char *, size_t); @@ -195,6 +209,7 @@ static enum rofferr roff_line_ignore(ROFF_ARGS); static void roff_man_alloc1(struct roff_man *); static void roff_man_free1(struct roff_man *); static enum rofferr roff_manyarg(ROFF_ARGS); +static enum rofferr roff_nop(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); static enum rofferr roff_onearg(ROFF_ARGS); static enum roff_tok roff_parse(struct roff *, char *, int *, @@ -203,13 +218,17 @@ static enum rofferr roff_parsetext(struct roff *, str int, int *); static enum rofferr roff_renamed(ROFF_ARGS); static enum rofferr roff_res(struct roff *, struct buf *, int, int); +static enum rofferr roff_return(ROFF_ARGS); static enum rofferr roff_rm(ROFF_ARGS); static enum rofferr roff_rn(ROFF_ARGS); static enum rofferr roff_rr(ROFF_ARGS); +static void roff_setregn(struct roff *, const char *, + size_t, int, char, int); static void roff_setstr(struct roff *, const char *, const char *, int); static void roff_setstrn(struct roffkv **, const char *, size_t, const char *, size_t, int); +static enum rofferr roff_shift(ROFF_ARGS); static enum rofferr roff_so(ROFF_ARGS); static enum rofferr roff_tr(ROFF_ARGS); static enum rofferr roff_Dd(ROFF_ARGS); @@ -322,6 +341,7 @@ const char *__roff_name[MAN_MAX + 1] = { "Dx", "%Q", "%U", "Ta", NULL, "TH", "SH", "SS", "TP", + "TQ", "LP", "PP", "P", "IP", "HP", "SM", "SB", "BI", "IB", "BR", "RB", "R", @@ -329,7 +349,8 @@ const char *__roff_name[MAN_MAX + 1] = { "nf", "fi", "RE", "RS", "DT", "UC", "PD", "AT", "in", - "OP", "EX", "EE", "UR", + "SY", "YS", "OP", + "EX", "EE", "UR", "UE", "MT", "ME", NULL }; const char *const *roff_name = __roff_name; @@ -486,7 +507,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ { roff_unsupp, NULL, NULL, 0 }, /* nm */ { roff_unsupp, NULL, NULL, 0 }, /* nn */ - { roff_unsupp, NULL, NULL, 0 }, /* nop */ + { roff_nop, NULL, NULL, 0 }, /* nop */ { roff_nr, NULL, NULL, 0 }, /* nr */ { roff_unsupp, NULL, NULL, 0 }, /* nrf */ { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ @@ -515,7 +536,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_unsupp, NULL, NULL, 0 }, /* rchar */ { roff_line_ignore, NULL, NULL, 0 }, /* rd */ { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ - { roff_unsupp, NULL, NULL, 0 }, /* return */ + { roff_return, NULL, NULL, 0 }, /* return */ { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ { roff_rm, NULL, NULL, 0 }, /* rm */ @@ -527,7 +548,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_unsupp, NULL, NULL, 0 }, /* schar */ { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ { roff_line_ignore, NULL, NULL, 0 }, /* shc */ - { roff_unsupp, NULL, NULL, 0 }, /* shift */ + { roff_shift, NULL, NULL, 0 }, /* shift */ { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ { roff_so, NULL, NULL, 0 }, /* so */ { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ @@ -695,7 +716,6 @@ static void roff_free1(struct roff *r) { struct tbl_node *tbl; - struct eqn_node *e; int i; while (NULL != (tbl = r->first_tbl)) { @@ -704,12 +724,13 @@ roff_free1(struct roff *r) } r->first_tbl = r->last_tbl = r->tbl = NULL; - while (NULL != (e = r->first_eqn)) { - r->first_eqn = e->next; - eqn_free(e); - } - r->first_eqn = r->last_eqn = r->eqn = NULL; + if (r->last_eqn != NULL) + eqn_free(r->last_eqn); + r->last_eqn = r->eqn = NULL; + while (r->mstackpos >= 0) + roff_userret(r); + while (r->last) roffnode_pop(r); @@ -749,7 +770,12 @@ roff_reset(struct roff *r) void roff_free(struct roff *r) { + int i; + roff_free1(r); + for (i = 0; i < r->mstacksz; i++) + free(r->mstack[i].argv); + free(r->mstack); roffhash_free(r->reqtab); free(r); } @@ -761,9 +787,10 @@ roff_alloc(struct mparse *parse, int options) r = mandoc_calloc(1, sizeof(struct roff)); r->parse = parse; - r->reqtab = roffhash_alloc(0, ROFF_USERDEF); + r->reqtab = roffhash_alloc(0, ROFF_RENAMED); r->options = options; r->format = options & (MPARSE_MDOC | MPARSE_MAN); + r->mstackpos = -1; r->rstackpos = -1; r->escape = '\\'; return r; @@ -983,31 +1010,21 @@ roff_body_alloc(struct roff_man *man, int line, int po return n; } -void -roff_addeqn(struct roff_man *man, const struct eqn *eqn) +static void +roff_addtbl(struct roff_man *man, struct tbl_node *tbl) { struct roff_node *n; + const struct tbl_span *span; - n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE); - n->eqn = eqn; - if (eqn->ln > man->last->line) - n->flags |= NODE_LINE; - roff_node_append(man, n); - man->next = ROFF_NEXT_SIBLING; -} - -void -roff_addtbl(struct roff_man *man, const struct tbl_span *tbl) -{ - struct roff_node *n; - if (man->macroset == MACROSET_MAN) man_breakscope(man, ROFF_TS); - n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); - n->span = tbl; - roff_node_append(man, n); - n->flags |= NODE_VALID | NODE_ENDED; - man->next = ROFF_NEXT_SIBLING; + while ((span = tbl_span(tbl)) != NULL) { + n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); + n->span = span; + roff_node_append(man, n); + n->flags |= NODE_VALID | NODE_ENDED; + man->next = ROFF_NEXT_SIBLING; + } } void @@ -1055,6 +1072,8 @@ roff_node_free(struct roff_node *n) mdoc_argv_free(n->args); if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) free(n->norm); + if (n->eqn != NULL) + eqn_box_free(n->eqn); free(n->string); free(n); } @@ -1128,23 +1147,30 @@ deroff(char **dest, const struct roff_node *n) static enum rofferr roff_res(struct roff *r, struct buf *buf, int ln, int pos) { + struct mctx *ctx; /* current macro call context */ char ubuf[24]; /* buffer to print the number */ + struct roff_node *n; /* used for header comments */ const char *start; /* start of the string to process */ char *stesc; /* start of an escape sequence ('\\') */ + char *ep; /* end of comment string */ const char *stnam; /* start of the name, after "[(*" */ const char *cp; /* end of the name, e.g. before ']' */ const char *res; /* the string to be substituted */ char *nbuf; /* new buffer to copy buf->buf to */ size_t maxl; /* expected length of the escape name */ size_t naml; /* actual length of the escape name */ + size_t asz; /* length of the replacement */ + size_t rsz; /* length of the rest of the string */ enum mandoc_esc esc; /* type of the escape sequence */ - enum mandoc_os os_e; /* kind of RCS id seen */ int inaml; /* length returned from mandoc_escape() */ int expand_count; /* to avoid infinite loops */ int npos; /* position in numeric expression */ int arg_complete; /* argument not interrupted by eol */ + int quote_args; /* true for \\$@, false for \\$* */ int done; /* no more input available */ int deftype; /* type of definition to paste */ + int rcsid; /* kind of RCS id seen */ + char sign; /* increment number register */ char term; /* character terminating the escape */ /* Search forward for comments. */ @@ -1160,33 +1186,63 @@ roff_res(struct roff *r, struct buf *buf, int ln, int /* Comment found, look for RCS id. */ + rcsid = 0; if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { - os_e = MANDOC_OS_OPENBSD; + rcsid = 1 << MANDOC_OS_OPENBSD; cp += 8; } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { - os_e = MANDOC_OS_NETBSD; + rcsid = 1 << MANDOC_OS_NETBSD; cp += 7; } if (cp != NULL && isalnum((unsigned char)*cp) == 0 && strchr(cp, '$') != NULL) { - if (r->man->meta.rcsids & (1 << os_e)) + if (r->man->meta.rcsids & rcsid) mandoc_msg(MANDOCERR_RCS_REP, r->parse, ln, stesc + 1 - buf->buf, stesc + 1); - r->man->meta.rcsids |= 1 << os_e; + r->man->meta.rcsids |= rcsid; } /* Handle trailing whitespace. */ - cp = strchr(stesc--, '\0') - 1; - if (*cp == '\n') { + ep = strchr(stesc--, '\0') - 1; + if (*ep == '\n') { done = 1; - cp--; + ep--; } - if (*cp == ' ' || *cp == '\t') + if (*ep == ' ' || *ep == '\t') mandoc_msg(MANDOCERR_SPACE_EOL, r->parse, - ln, cp - buf->buf, NULL); - while (stesc > start && stesc[-1] == ' ') + ln, ep - buf->buf, NULL); + + /* + * Save comments preceding the title macro + * in the syntax tree. + */ + + if (r->format == 0) { + while (*ep == ' ' || *ep == '\t') + ep--; + ep[1] = '\0'; + n = roff_node_alloc(r->man, + ln, stesc + 1 - buf->buf, + ROFFT_COMMENT, TOKEN_NONE); + n->string = mandoc_strdup(stesc + 2); + roff_node_append(r->man, n); + n->flags |= NODE_VALID | NODE_ENDED; + r->man->next = ROFF_NEXT_SIBLING; + } + + /* Line continuation with comment. */ + + if (stesc[1] == '#') { + *stesc = '\0'; + return ROFF_APPEND; + } + + /* Discard normal comments. */ + + while (stesc > start && stesc[-1] == ' ' && + (stesc == start + 1 || stesc[-2] != '\\')) stesc--; *stesc = '\0'; break; @@ -1247,6 +1303,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int cp = stesc + 1; switch (*cp) { case '*': + case '$': res = NULL; break; case 'B': @@ -1254,6 +1311,9 @@ roff_res(struct roff *r, struct buf *buf, int ln, int term = cp[1]; /* FALLTHROUGH */ case 'n': + sign = cp[1]; + if (sign == '+' || sign == '-') + cp++; res = ubuf; break; default: @@ -1345,8 +1405,77 @@ roff_res(struct roff *r, struct buf *buf, int ln, int if (arg_complete) { deftype = ROFFDEF_USER | ROFFDEF_PRE; res = roff_getstrn(r, stnam, naml, &deftype); + + /* + * If not overriden, let \*(.T + * through to the formatters. + */ + + if (res == NULL && naml == 2 && + stnam[0] == '.' && stnam[1] == 'T') { + roff_setstrn(&r->strtab, + ".T", 2, NULL, 0, 0); + stesc--; + continue; + } } break; + case '$': + if (r->mstackpos < 0) { + mandoc_vmsg(MANDOCERR_ARG_UNDEF, + r->parse, ln, (int)(stesc - buf->buf), + "%.3s", stesc); + break; + } + ctx = r->mstack + r->mstackpos; + npos = stesc[2] - '1'; + if (npos >= 0 && npos <= 8) { + res = npos < ctx->argc ? + ctx->argv[npos] : ""; + break; + } + if (stesc[2] == '*') + quote_args = 0; + else if (stesc[2] == '@') + quote_args = 1; + else { + mandoc_vmsg(MANDOCERR_ARG_NONUM, + r->parse, ln, (int)(stesc - buf->buf), + "%.3s", stesc); + break; + } + asz = 0; + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + asz++; /* blank */ + if (quote_args) + asz += 2; /* quotes */ + asz += strlen(ctx->argv[npos]); + } + if (asz != 3) { + rsz = buf->sz - (stesc - buf->buf) - 3; + if (asz < 3) + memmove(stesc + asz, stesc + 3, rsz); + buf->sz += asz - 3; + nbuf = mandoc_realloc(buf->buf, buf->sz); + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + buf->buf = nbuf; + if (asz > 3) + memmove(stesc + asz, stesc + 3, rsz); + } + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + *stesc++ = ' '; + if (quote_args) + *stesc++ = '"'; + cp = ctx->argv[npos]; + while (*cp != '\0') + *stesc++ = *cp++; + if (quote_args) + *stesc++ = '"'; + } + continue; case 'B': npos = 0; ubuf[0] = arg_complete && @@ -1358,7 +1487,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int case 'n': if (arg_complete) (void)snprintf(ubuf, sizeof(ubuf), "%d", - roff_getregn(r, stnam, naml)); + roff_getregn(r, stnam, naml, sign)); else ubuf[0] = '\0'; break; @@ -1370,9 +1499,10 @@ roff_res(struct roff *r, struct buf *buf, int ln, int } if (res == NULL) { - mandoc_vmsg(MANDOCERR_STR_UNDEF, - r->parse, ln, (int)(stesc - buf->buf), - "%.*s", (int)naml, stnam); + if (stesc[1] == '*') + mandoc_vmsg(MANDOCERR_STR_UNDEF, + r->parse, ln, (int)(stesc - buf->buf), + "%.*s", (int)naml, stnam); res = ""; } else if (buf->sz + strlen(res) > SHRT_MAX) { mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, @@ -1512,10 +1642,15 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, return e; assert(e == ROFF_CONT); } - if (r->eqn != NULL) - return eqn_read(&r->eqn, ln, buf->buf, ppos, offs); - if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) - return tbl_read(r->tbl, ln, buf->buf, ppos); + if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { + eqn_read(r->eqn, buf->buf + ppos); + return ROFF_IGN; + } + if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { + tbl_read(r->tbl, ln, buf->buf, ppos); + roff_addtbl(r->man, r->tbl); + return ROFF_IGN; + } if ( ! ctl) return roff_parsetext(r, buf, pos, offs); @@ -1556,13 +1691,16 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, pos++; while (buf->buf[pos] == ' ') pos++; - return tbl_read(r->tbl, ln, buf->buf, pos); + tbl_read(r->tbl, ln, buf->buf, pos); + roff_addtbl(r->man, r->tbl); + return ROFF_IGN; } /* For now, let high level macros abort .ce mode. */ if (ctl && roffce_node != NULL && - (t == TOKEN_NONE || t == ROFF_EQ || t == ROFF_TS)) { + (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || + t == ROFF_TH || t == ROFF_TS)) { r->man->last = roffce_node; r->man->next = ROFF_NEXT_SIBLING; roffce_lines = 0; @@ -1582,25 +1720,45 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); } +/* + * Internal interface function to tell the roff parser that execution + * of the current macro ended. This is required because macro + * definitions usually do not end with a .return request. + */ void -roff_endparse(struct roff *r) +roff_userret(struct roff *r) { + struct mctx *ctx; + int i; - if (r->last) + assert(r->mstackpos >= 0); + ctx = r->mstack + r->mstackpos; + for (i = 0; i < ctx->argc; i++) + free(ctx->argv[i]); + ctx->argc = 0; + r->mstackpos--; +} + +void +roff_endparse(struct roff *r) +{ + if (r->last != NULL) mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, r->last->line, r->last->col, roff_name[r->last->tok]); - if (r->eqn) { + if (r->eqn != NULL) { mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, - r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ"); - eqn_end(&r->eqn); + r->eqn->node->line, r->eqn->node->pos, "EQ"); + eqn_parse(r->eqn); + r->eqn = NULL; } - if (r->tbl) { + if (r->tbl != NULL) { mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, r->tbl->line, r->tbl->pos, "TS"); - tbl_end(&r->tbl); + tbl_end(r->tbl); + r->tbl = NULL; } } @@ -1640,6 +1798,11 @@ roff_parse(struct roff *r, char *buf, int *pos, int ln } if (t != TOKEN_NONE) *pos = cp - buf; + else if (deftype == ROFFDEF_UNDEF) { + /* Using an undefined macro defines it to be empty. */ + roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); + roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); + } return t; } @@ -2109,9 +2272,10 @@ out: static int roff_evalcond(struct roff *r, int ln, char *v, int *pos) { - char *cp, *name; - size_t sz; - int deftype, number, savepos, istrue, wanttrue; + const char *start, *end; + char *cp, *name; + size_t sz; + int deftype, len, number, savepos, istrue, wanttrue; if ('!' == v[*pos]) { wanttrue = 0; @@ -2126,12 +2290,50 @@ roff_evalcond(struct roff *r, int ln, char *v, int *po case 'o': (*pos)++; return wanttrue; - case 'c': case 'e': case 't': case 'v': (*pos)++; return !wanttrue; + case 'c': + do { + (*pos)++; + } while (v[*pos] == ' '); + + /* + * Quirk for groff compatibility: + * The horizontal tab is neither available nor unavailable. + */ + + if (v[*pos] == '\t') { + (*pos)++; + return 0; + } + + /* Printable ASCII characters are available. */ + + if (v[*pos] != '\\') { + (*pos)++; + return wanttrue; + } + + end = v + ++*pos; + switch (mandoc_escape(&end, &start, &len)) { + case ESCAPE_SPECIAL: + istrue = mchars_spec2cp(start, len) != -1; + break; + case ESCAPE_UNICODE: + istrue = 1; + break; + case ESCAPE_NUMBERED: + istrue = mchars_num2char(start, len) != -1; + break; + default: + istrue = !wanttrue; + break; + } + *pos = end - v; + return istrue == wanttrue; case 'd': case 'r': cp = v + *pos + 1; @@ -2516,20 +2718,29 @@ roff_evalnum(struct roff *r, int ln, const char *v, void roff_setreg(struct roff *r, const char *name, int val, char sign) { + roff_setregn(r, name, strlen(name), val, sign, INT_MIN); +} + +static void +roff_setregn(struct roff *r, const char *name, size_t len, + int val, char sign, int step) +{ struct roffreg *reg; /* Search for an existing register with the same name. */ reg = r->regtab; - while (reg && strcmp(name, reg->key.p)) + while (reg != NULL && (reg->key.sz != len || + strncmp(reg->key.p, name, len) != 0)) reg = reg->next; if (NULL == reg) { /* Create a new register. */ reg = mandoc_malloc(sizeof(struct roffreg)); - reg->key.p = mandoc_strdup(name); - reg->key.sz = strlen(name); + reg->key.p = mandoc_strndup(name, len); + reg->key.sz = len; reg->val = 0; + reg->step = 0; reg->next = r->regtab; r->regtab = reg; } @@ -2540,6 +2751,8 @@ roff_setreg(struct roff *r, const char *name, int val, reg->val -= val; else reg->val = val; + if (step != INT_MIN) + reg->step = step; } /* @@ -2554,7 +2767,7 @@ roff_getregro(const struct roff *r, const char *name) switch (*name) { case '$': /* Number of arguments of the last macro evaluated. */ - return r->argc; + return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; case 'A': /* ASCII approximation mode is always off. */ return 0; case 'g': /* Groff compatibility mode is always on. */ @@ -2573,26 +2786,13 @@ roff_getregro(const struct roff *r, const char *name) } int -roff_getreg(const struct roff *r, const char *name) +roff_getreg(struct roff *r, const char *name) { - struct roffreg *reg; - int val; - - if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) { - val = roff_getregro(r, name + 1); - if (-1 != val) - return val; - } - - for (reg = r->regtab; reg; reg = reg->next) - if (0 == strcmp(name, reg->key.p)) - return reg->val; - - return 0; + return roff_getregn(r, name, strlen(name), '\0'); } static int -roff_getregn(const struct roff *r, const char *name, size_t len) +roff_getregn(struct roff *r, const char *name, size_t len, char sign) { struct roffreg *reg; int val; @@ -2603,11 +2803,24 @@ roff_getregn(const struct roff *r, const char *name, s return val; } - for (reg = r->regtab; reg; reg = reg->next) + for (reg = r->regtab; reg; reg = reg->next) { if (len == reg->key.sz && - 0 == strncmp(name, reg->key.p, len)) + 0 == strncmp(name, reg->key.p, len)) { + switch (sign) { + case '+': + reg->val += reg->step; + break; + case '-': + reg->val -= reg->step; + break; + default: + break; + } return reg->val; + } + } + roff_setregn(r, name, len, 0, '\0', INT_MIN); return 0; } @@ -2647,9 +2860,9 @@ roff_freereg(struct roffreg *reg) static enum rofferr roff_nr(ROFF_ARGS) { - char *key, *val; + char *key, *val, *step; size_t keysz; - int iv; + int iv, is, len; char sign; key = val = buf->buf + pos; @@ -2659,15 +2872,22 @@ roff_nr(ROFF_ARGS) keysz = roff_getname(r, &val, ln, pos); if (key[keysz] == '\\') return ROFF_IGN; - key[keysz] = '\0'; sign = *val; if (sign == '+' || sign == '-') val++; - if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE)) - roff_setreg(r, key, iv, sign); + len = 0; + if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) + return ROFF_IGN; + step = val + len; + while (isspace((unsigned char)*step)) + step++; + if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) + is = INT_MIN; + + roff_setregn(r, key, keysz, iv, sign, is); return ROFF_IGN; } @@ -2782,16 +3002,20 @@ roff_Dd(ROFF_ARGS) static enum rofferr roff_TE(ROFF_ARGS) { - - if (NULL == r->tbl) + if (r->tbl == NULL) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "TE"); - else if ( ! tbl_end(&r->tbl)) { + return ROFF_IGN; + } + if (tbl_end(r->tbl) == 0) { + r->tbl = NULL; free(buf->buf); buf->buf = mandoc_strdup(".sp"); buf->sz = 4; + *offs = 0; return ROFF_REPARSE; } + r->tbl = NULL; return ROFF_IGN; } @@ -2877,21 +3101,26 @@ roff_eqndelim(struct roff *r, struct buf *buf, int pos static enum rofferr roff_EQ(ROFF_ARGS) { - struct eqn_node *e; + struct roff_node *n; + if (r->man->macroset == MACROSET_MAN) + man_breakscope(r->man, ROFF_EQ); + n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); + if (ln > r->man->last->line) + n->flags |= NODE_LINE; + n->eqn = mandoc_calloc(1, sizeof(*n->eqn)); + n->eqn->expectargs = UINT_MAX; + roff_node_append(r->man, n); + r->man->next = ROFF_NEXT_SIBLING; + assert(r->eqn == NULL); - e = eqn_alloc(ppos, ln, r->parse); + if (r->last_eqn == NULL) + r->last_eqn = eqn_alloc(r->parse); + else + eqn_reset(r->last_eqn); + r->eqn = r->last_eqn; + r->eqn->node = n; - if (r->last_eqn) { - r->last_eqn->next = e; - e->delim = r->last_eqn->delim; - e->odelim = r->last_eqn->odelim; - e->cdelim = r->last_eqn->cdelim; - } else - r->first_eqn = r->last_eqn = e; - - r->eqn = r->last_eqn = e; - if (buf->buf[pos] != '\0') mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, ".EQ %s", buf->buf + pos); @@ -2902,30 +3131,31 @@ roff_EQ(ROFF_ARGS) static enum rofferr roff_EN(ROFF_ARGS) { - - mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); + if (r->eqn != NULL) { + eqn_parse(r->eqn); + r->eqn = NULL; + } else + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, + "EN %s", buf->buf + pos); return ROFF_IGN; } static enum rofferr roff_TS(ROFF_ARGS) { - struct tbl_node *tbl; - - if (r->tbl) { + if (r->tbl != NULL) { mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, ln, ppos, "TS breaks TS"); - tbl_end(&r->tbl); + tbl_end(r->tbl); } - - tbl = tbl_alloc(ppos, ln, r->parse); - + r->tbl = tbl_alloc(ppos, ln, r->parse); if (r->last_tbl) - r->last_tbl->next = tbl; + r->last_tbl->next = r->tbl; else - r->first_tbl = r->last_tbl = tbl; - - r->tbl = r->last_tbl = tbl; + r->first_tbl = r->tbl; + r->last_tbl = r->tbl; return ROFF_IGN; } @@ -2937,7 +3167,8 @@ roff_onearg(ROFF_ARGS) int npos; if (r->man->flags & (MAN_BLINE | MAN_ELINE) && - (tok == ROFF_sp || tok == ROFF_ti)) + (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || + tok == ROFF_ti)) man_breakscope(r->man, tok); if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { @@ -3030,7 +3261,7 @@ roff_als(ROFF_ARGS) if (oldsz == 0) return ROFF_IGN; - valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n", + valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", (int)oldsz, oldn); roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); @@ -3097,6 +3328,15 @@ roff_eo(ROFF_ARGS) } static enum rofferr +roff_nop(ROFF_ARGS) +{ + while (buf->buf[pos] == ' ') + pos++; + *offs = pos; + return ROFF_RERUN; +} + +static enum rofferr roff_tr(ROFF_ARGS) { const char *p, *first, *second; @@ -3158,7 +3398,23 @@ roff_tr(ROFF_ARGS) return ROFF_IGN; } +/* + * Implementation of the .return request. + * There is no need to call roff_userret() from here. + * The read module will call that after rewinding the reader stack + * to the place from where the current macro was called. + */ static enum rofferr +roff_return(ROFF_ARGS) +{ + if (r->mstackpos >= 0) + return ROFF_USERRET; + + mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "return"); + return ROFF_IGN; +} + +static enum rofferr roff_rn(ROFF_ARGS) { const char *value; @@ -3209,6 +3465,39 @@ roff_rn(ROFF_ARGS) } static enum rofferr +roff_shift(ROFF_ARGS) +{ + struct mctx *ctx; + int levels, i; + + levels = 1; + if (buf->buf[pos] != '\0' && + roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { + mandoc_vmsg(MANDOCERR_CE_NONUM, r->parse, + ln, pos, "shift %s", buf->buf + pos); + levels = 1; + } + if (r->mstackpos < 0) { + mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "shift"); + return ROFF_IGN; + } + ctx = r->mstack + r->mstackpos; + if (levels > ctx->argc) { + mandoc_vmsg(MANDOCERR_SHIFT, r->parse, + ln, pos, "%d, but max is %d", levels, ctx->argc); + levels = ctx->argc; + } + if (levels == 0) + return ROFF_IGN; + for (i = 0; i < levels; i++) + free(ctx->argv[i]); + ctx->argc -= levels; + for (i = 0; i < ctx->argc; i++) + ctx->argv[i] = ctx->argv[i + levels]; + return ROFF_IGN; +} + +static enum rofferr roff_so(ROFF_ARGS) { char *name, *cp; @@ -3243,143 +3532,58 @@ roff_so(ROFF_ARGS) static enum rofferr roff_userdef(ROFF_ARGS) { - const char *arg[16], *ap; - char *cp, *n1, *n2; - int expand_count, i, ib, ie; - size_t asz, rsz; + struct mctx *ctx; + char *arg, *ap, *dst, *src; + size_t sz; - /* - * Collect pointers to macro argument strings - * and NUL-terminate them. - */ + /* Initialize a new macro stack context. */ - r->argc = 0; - cp = buf->buf + pos; - for (i = 0; i < 16; i++) { - if (*cp == '\0') - arg[i] = ""; - else { - arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); - r->argc = i + 1; - } + if (++r->mstackpos == r->mstacksz) { + r->mstack = mandoc_recallocarray(r->mstack, + r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); + r->mstacksz += 8; } + ctx = r->mstack + r->mstackpos; + ctx->argsz = 0; + ctx->argc = 0; + ctx->argv = NULL; /* - * Expand macro arguments. + * Collect pointers to macro argument strings, + * NUL-terminating them and escaping quotes. */ - buf->sz = strlen(r->current_string) + 1; - n1 = n2 = cp = mandoc_malloc(buf->sz); - memcpy(n1, r->current_string, buf->sz); - expand_count = 0; - while (*cp != '\0') { - - /* Scan ahead for the next argument invocation. */ - - if (*cp++ != '\\') - continue; - if (*cp++ != '$') - continue; - if (*cp == '*') { /* \\$* inserts all arguments */ - ib = 0; - ie = r->argc - 1; - } else { /* \\$1 .. \\$9 insert one argument */ - ib = ie = *cp - '1'; - if (ib < 0 || ib > 8) - continue; + src = buf->buf + pos; + while (*src != '\0') { + if (ctx->argc == ctx->argsz) { + ctx->argsz += 8; + ctx->argv = mandoc_reallocarray(ctx->argv, + ctx->argsz, sizeof(*ctx->argv)); } - cp -= 2; - - /* - * Prevent infinite recursion. - */ - - if (cp >= n2) - expand_count = 1; - else if (++expand_count > EXPAND_LIMIT) { - mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, - ln, (int)(cp - n1), NULL); - free(buf->buf); - buf->buf = n1; - return ROFF_IGN; + arg = mandoc_getarg(r->parse, &src, ln, &pos); + sz = 1; /* For the terminating NUL. */ + for (ap = arg; *ap != '\0'; ap++) + sz += *ap == '"' ? 4 : 1; + ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); + for (ap = arg; *ap != '\0'; ap++) { + if (*ap == '"') { + memcpy(dst, "\\(dq", 4); + dst += 4; + } else + *dst++ = *ap; } - - /* - * Determine the size of the expanded argument, - * taking escaping of quotes into account. - */ - - asz = ie > ib ? ie - ib : 0; /* for blanks */ - for (i = ib; i <= ie; i++) { - for (ap = arg[i]; *ap != '\0'; ap++) { - asz++; - if (*ap == '"') - asz += 3; - } - } - if (asz != 3) { - - /* - * Determine the size of the rest of the - * unexpanded macro, including the NUL. - */ - - rsz = buf->sz - (cp - n1) - 3; - - /* - * When shrinking, move before - * releasing the storage. - */ - - if (asz < 3) - memmove(cp + asz, cp + 3, rsz); - - /* - * Resize the storage for the macro - * and readjust the parse pointer. - */ - - buf->sz += asz - 3; - n2 = mandoc_realloc(n1, buf->sz); - cp = n2 + (cp - n1); - n1 = n2; - - /* - * When growing, make room - * for the expanded argument. - */ - - if (asz > 3) - memmove(cp + asz, cp + 3, rsz); - } - - /* Copy the expanded argument, escaping quotes. */ - - n2 = cp; - for (i = ib; i <= ie; i++) { - for (ap = arg[i]; *ap != '\0'; ap++) { - if (*ap == '"') { - memcpy(n2, "\\(dq", 4); - n2 += 4; - } else - *n2++ = *ap; - } - if (i < ie) - *n2++ = ' '; - } + *dst = '\0'; } - /* - * Replace the macro invocation - * by the expanded macro. - */ + /* Replace the macro invocation by the macro definition. */ free(buf->buf); - buf->buf = n1; + buf->buf = mandoc_strdup(r->current_string); + buf->sz = strlen(buf->buf) + 1; *offs = 0; return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? - ROFF_REPARSE : ROFF_APPEND; + ROFF_USERCALL : ROFF_APPEND; } /* @@ -3395,6 +3599,7 @@ roff_renamed(ROFF_ARGS) buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; free(buf->buf); buf->buf = nbuf; + *offs = 0; return ROFF_CONT; } @@ -3528,62 +3733,95 @@ roff_setstrn(struct roffkv **r, const char *name, size } static const char * -roff_getstrn(const struct roff *r, const char *name, size_t len, +roff_getstrn(struct roff *r, const char *name, size_t len, int *deftype) { const struct roffkv *n; - int i; + int found, i; enum roff_tok tok; - if (*deftype & ROFFDEF_USER) { - for (n = r->strtab; n != NULL; n = n->next) { - if (strncmp(name, n->key.p, len) == 0 && - n->key.p[len] == '\0' && - n->val.p != NULL) { - *deftype = ROFFDEF_USER; - return n->val.p; - } + found = 0; + for (n = r->strtab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) != 0 || + n->key.p[len] != '\0' || n->val.p == NULL) + continue; + if (*deftype & ROFFDEF_USER) { + *deftype = ROFFDEF_USER; + return n->val.p; + } else { + found = 1; + break; } } - if (*deftype & ROFFDEF_PRE) { - for (i = 0; i < PREDEFS_MAX; i++) { - if (strncmp(name, predefs[i].name, len) == 0 && - predefs[i].name[len] == '\0') { - *deftype = ROFFDEF_PRE; - return predefs[i].str; - } + for (n = r->rentab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) != 0 || + n->key.p[len] != '\0' || n->val.p == NULL) + continue; + if (*deftype & ROFFDEF_REN) { + *deftype = ROFFDEF_REN; + return n->val.p; + } else { + found = 1; + break; } } - if (*deftype & ROFFDEF_REN) { - for (n = r->rentab; n != NULL; n = n->next) { - if (strncmp(name, n->key.p, len) == 0 && - n->key.p[len] == '\0' && - n->val.p != NULL) { - *deftype = ROFFDEF_REN; - return n->val.p; - } + for (i = 0; i < PREDEFS_MAX; i++) { + if (strncmp(name, predefs[i].name, len) != 0 || + predefs[i].name[len] != '\0') + continue; + if (*deftype & ROFFDEF_PRE) { + *deftype = ROFFDEF_PRE; + return predefs[i].str; + } else { + found = 1; + break; } } - if (*deftype & ROFFDEF_STD) { - if (r->man->macroset != MACROSET_MAN) { - for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { - if (strncmp(name, roff_name[tok], len) == 0 && - roff_name[tok][len] == '\0') { - *deftype = ROFFDEF_STD; - return NULL; - } + if (r->man->macroset != MACROSET_MAN) { + for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) != 0 || + roff_name[tok][len] != '\0') + continue; + if (*deftype & ROFFDEF_STD) { + *deftype = ROFFDEF_STD; + return NULL; + } else { + found = 1; + break; } } - if (r->man->macroset != MACROSET_MDOC) { - for (tok = MAN_TH; tok < MAN_MAX; tok++) { - if (strncmp(name, roff_name[tok], len) == 0 && - roff_name[tok][len] == '\0') { - *deftype = ROFFDEF_STD; - return NULL; - } + } + if (r->man->macroset != MACROSET_MDOC) { + for (tok = MAN_TH; tok < MAN_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) != 0 || + roff_name[tok][len] != '\0') + continue; + if (*deftype & ROFFDEF_STD) { + *deftype = ROFFDEF_STD; + return NULL; + } else { + found = 1; + break; } } } + + if (found == 0 && *deftype != ROFFDEF_ANY) { + if (*deftype & ROFFDEF_REN) { + /* + * This might still be a request, + * so do not treat it as undefined yet. + */ + *deftype = ROFFDEF_UNDEF; + return NULL; + } + + /* Using an undefined string defines it to be empty. */ + + roff_setstrn(&r->strtab, name, len, "", 0, 0); + roff_setstrn(&r->rentab, name, len, NULL, 0, 0); + } + *deftype = 0; return NULL; } @@ -3602,20 +3840,6 @@ roff_freestr(struct roffkv *r) } /* --- accessors and utility functions ------------------------------------ */ - -const struct tbl_span * -roff_span(const struct roff *r) -{ - - return r->tbl ? tbl_span(r->tbl) : NULL; -} - -const struct eqn * -roff_eqn(const struct roff *r) -{ - - return r->last_eqn ? &r->last_eqn->eqn : NULL; -} /* * Duplicate an input string, making the appropriate character