=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.301 retrieving revision 1.307 diff -u -p -r1.301 -r1.307 --- mandoc/roff.c 2017/05/08 15:34:54 1.301 +++ mandoc/roff.c 2017/06/08 19:35:51 1.307 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.301 2017/05/08 15:34:54 schwarze Exp $ */ +/* $Id: roff.c,v 1.307 2017/06/08 19:35:51 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons * Copyright (c) 2010-2015, 2017 Ingo Schwarze @@ -83,6 +83,7 @@ struct roff { struct ohash *reqtab; /* request lookup table */ struct roffreg *regtab; /* number registers */ struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *rentab; /* renamed strings & macros */ struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ struct roffstr *xtab; /* single-byte trans table (`tr') */ const char *current_string; /* value of last called user macro */ @@ -99,6 +100,7 @@ struct roff { int format; /* current file in mdoc or man format */ int argc; /* number of args of the last macro */ char control; /* control character */ + char escape; /* escape character */ }; struct roffnode { @@ -155,6 +157,8 @@ static enum rofferr roff_cond(ROFF_ARGS); static enum rofferr roff_cond_text(ROFF_ARGS); static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); +static enum rofferr roff_ec(ROFF_ARGS); +static enum rofferr roff_eo(ROFF_ARGS); static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); static int roff_evalcond(struct roff *r, int, char *, int *); static int roff_evalnum(struct roff *, int, @@ -172,6 +176,8 @@ static int roff_getregn(const struct roff *, const char *, size_t); static int roff_getregro(const struct roff *, const char *name); +static const char *roff_getrenn(const struct roff *, + const char *, size_t); static const char *roff_getstrn(const struct roff *, const char *, size_t); static int roff_hasregn(const struct roff *, @@ -186,9 +192,12 @@ static enum rofferr roff_nr(ROFF_ARGS); static enum rofferr roff_onearg(ROFF_ARGS); static enum roff_tok roff_parse(struct roff *, char *, int *, int, int); -static enum rofferr roff_parsetext(struct buf *, int, int *); +static enum rofferr roff_parsetext(struct roff *, struct buf *, + int, int *); +static enum rofferr roff_renamed(ROFF_ARGS); static enum rofferr roff_res(struct roff *, struct buf *, int, int); static enum rofferr roff_rm(ROFF_ARGS); +static enum rofferr roff_rn(ROFF_ARGS); static enum rofferr roff_rr(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); @@ -212,15 +221,16 @@ static enum rofferr roff_userdef(ROFF_ARGS); #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ const char *__roff_name[MAN_MAX + 1] = { - "br", "ft", "ll", "sp", - "ta", "ti", NULL, + "br", "ce", "ft", "ll", + "mc", "sp", "ta", "ti", + NULL, "ab", "ad", "af", "aln", "als", "am", "am1", "ami", "ami1", "as", "as1", "asciify", "backtrace", "bd", "bleedat", "blm", "box", "boxa", "bp", "BP", "break", "breakchar", "brnl", "brp", - "brpnl", "c2", "cc", "ce", + "brpnl", "c2", "cc", "cf", "cflags", "ch", "char", "chop", "class", "close", "CL", "color", "composite", "continue", "cp", @@ -246,7 +256,7 @@ const char *__roff_name[MAN_MAX + 1] = { "lc", "lc_ctype", "lds", "length", "letadj", "lf", "lg", "lhang", "linetabs", "lnr", "lnrf", "lpfx", - "ls", "lsm", "lt", "mc", + "ls", "lsm", "lt", "mediasize", "minss", "mk", "mso", "na", "ne", "nh", "nhychar", "nm", "nn", "nop", "nr", @@ -273,7 +283,7 @@ const char *__roff_name[MAN_MAX + 1] = { "warnscale", "watch", "watchlength", "watchn", "wh", "while", "write", "writec", "writem", "xflag", ".", NULL, - "text", + NULL, "text", "Dd", "Dt", "Os", "Sh", "Ss", "Pp", "D1", "Dl", "Bd", "Ed", "Bl", "El", @@ -320,8 +330,10 @@ const char *const *roff_name = __roff_name; static struct roffmac roffs[TOKEN_NONE] = { { roff_br, NULL, NULL, 0 }, /* br */ + { roff_onearg, NULL, NULL, 0 }, /* ce */ { roff_onearg, NULL, NULL, 0 }, /* ft */ { roff_onearg, NULL, NULL, 0 }, /* ll */ + { roff_onearg, NULL, NULL, 0 }, /* mc */ { roff_onearg, NULL, NULL, 0 }, /* sp */ { roff_manyarg, NULL, NULL, 0 }, /* ta */ { roff_onearg, NULL, NULL, 0 }, /* ti */ @@ -353,7 +365,6 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ { roff_unsupp, NULL, NULL, 0 }, /* c2 */ { roff_cc, NULL, NULL, 0 }, /* cc */ - { roff_line_ignore, NULL, NULL, 0 }, /* ce */ { roff_insec, NULL, NULL, 0 }, /* cf */ { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ { roff_line_ignore, NULL, NULL, 0 }, /* ch */ @@ -385,13 +396,13 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_ds, NULL, NULL, 0 }, /* ds1 */ { roff_unsupp, NULL, NULL, 0 }, /* dwh */ { roff_unsupp, NULL, NULL, 0 }, /* dt */ - { roff_unsupp, NULL, NULL, 0 }, /* ec */ + { roff_ec, NULL, NULL, 0 }, /* ec */ { roff_unsupp, NULL, NULL, 0 }, /* ecr */ { roff_unsupp, NULL, NULL, 0 }, /* ecs */ { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ { roff_unsupp, NULL, NULL, 0 }, /* em */ { roff_EN, NULL, NULL, 0 }, /* EN */ - { roff_unsupp, NULL, NULL, 0 }, /* eo */ + { roff_eo, NULL, NULL, 0 }, /* eo */ { roff_unsupp, NULL, NULL, 0 }, /* EP */ { roff_EQ, NULL, NULL, 0 }, /* EQ */ { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ @@ -457,7 +468,6 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* ls */ { roff_unsupp, NULL, NULL, 0 }, /* lsm */ { roff_line_ignore, NULL, NULL, 0 }, /* lt */ - { roff_line_ignore, NULL, NULL, 0 }, /* mc */ { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ { roff_line_ignore, NULL, NULL, 0 }, /* minss */ { roff_line_ignore, NULL, NULL, 0 }, /* mk */ @@ -503,7 +513,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ { roff_line_ignore, NULL, NULL, 0 }, /* rj */ { roff_rm, NULL, NULL, 0 }, /* rm */ - { roff_unsupp, NULL, NULL, 0 }, /* rn */ + { roff_rn, NULL, NULL, 0 }, /* rn */ { roff_unsupp, NULL, NULL, 0 }, /* rnn */ { roff_rr, NULL, NULL, 0 }, /* rr */ { roff_line_ignore, NULL, NULL, 0 }, /* rs */ @@ -559,6 +569,7 @@ static struct roffmac roffs[TOKEN_NONE] = { { roff_insec, NULL, NULL, 0 }, /* writem */ { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ { roff_cblock, NULL, NULL, 0 }, /* . */ + { roff_renamed, NULL, NULL, 0 }, { roff_userdef, NULL, NULL, 0 } }; @@ -601,6 +612,8 @@ static const struct predef predefs[PREDEFS_MAX] = { #include "predefs.in" }; +static int roffce_lines; /* number of input lines to center */ +static struct roff_node *roffce_node; /* active request */ static int roffit_lines; /* number of lines to delay */ static char *roffit_macro; /* nil-terminated macro line */ @@ -736,8 +749,9 @@ roff_free1(struct roff *r) r->regtab = NULL; roff_freestr(r->strtab); + roff_freestr(r->rentab); roff_freestr(r->xmbtab); - r->strtab = r->xmbtab = NULL; + r->strtab = r->rentab = r->xmbtab = NULL; if (r->xtab) for (i = 0; i < 128; i++) @@ -751,7 +765,12 @@ roff_reset(struct roff *r) { roff_free1(r); r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); - r->control = 0; + r->control = '\0'; + r->escape = '\\'; + roffce_lines = 0; + roffce_node = NULL; + roffit_lines = 0; + roffit_macro = NULL; } void @@ -773,6 +792,7 @@ roff_alloc(struct mparse *parse, int options) r->options = options; r->format = options & (MPARSE_MDOC | MPARSE_MAN); r->rstackpos = -1; + r->escape = '\\'; return r; } @@ -1009,7 +1029,7 @@ roff_addtbl(struct roff_man *man, const struct tbl_spa struct roff_node *n; if (man->macroset == MACROSET_MAN) - man_breakscope(man, TOKEN_NONE); + man_breakscope(man, ROFF_TS); n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); n->span = tbl; roff_node_append(man, n); @@ -1149,27 +1169,80 @@ roff_res(struct roff *r, struct buf *buf, int ln, int int expand_count; /* to avoid infinite loops */ int npos; /* position in numeric expression */ int arg_complete; /* argument not interrupted by eol */ + int done; /* no more input available */ char term; /* character terminating the escape */ - expand_count = 0; + /* Search forward for comments. */ + + done = 0; start = buf->buf + pos; - stesc = strchr(start, '\0') - 1; - while (stesc-- > start) { + for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { + if (stesc[0] != r->escape || stesc[1] == '\0') + continue; + stesc++; + if (*stesc != '"' && *stesc != '#') + continue; + cp = strchr(stesc--, '\0') - 1; + if (*cp == '\n') { + done = 1; + cp--; + } + if (*cp == ' ' || *cp == '\t') + mandoc_msg(MANDOCERR_SPACE_EOL, r->parse, + ln, cp - buf->buf, NULL); + while (stesc > start && stesc[-1] == ' ') + stesc--; + *stesc = '\0'; + break; + } + if (stesc == start) + return ROFF_CONT; + stesc--; + /* Notice the end of the input. */ + + if (*stesc == '\n') { + *stesc-- = '\0'; + done = 1; + } + + expand_count = 0; + while (stesc >= start) { + /* Search backwards for the next backslash. */ - if (*stesc != '\\') + if (*stesc != r->escape) { + if (*stesc == '\\') { + *stesc = '\0'; + buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", + buf->buf, stesc + 1) + 1; + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + free(buf->buf); + buf->buf = nbuf; + } + stesc--; continue; + } /* If it is escaped, skip it. */ for (cp = stesc - 1; cp >= start; cp--) - if (*cp != '\\') + if (*cp != r->escape) break; if ((stesc - cp) % 2 == 0) { - stesc = (char *)cp; + while (stesc > cp) + *stesc-- = '\\'; continue; + } else if (stesc[1] != '\0') { + *stesc = '\\'; + } else { + *stesc-- = '\0'; + if (done) + continue; + else + return ROFF_APPEND; } /* Decide whether to expand or to check only. */ @@ -1195,6 +1268,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int mandoc_vmsg(MANDOCERR_ESC_BAD, r->parse, ln, (int)(stesc - buf->buf), "%.*s", (int)(cp - stesc), stesc); + stesc--; continue; } @@ -1328,7 +1402,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int * Process text streams. */ static enum rofferr -roff_parsetext(struct buf *buf, int pos, int *offs) +roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) { size_t sz; const char *start; @@ -1350,6 +1424,16 @@ roff_parsetext(struct buf *buf, int pos, int *offs) } else if (roffit_lines > 1) --roffit_lines; + if (roffce_node != NULL && buf->buf[pos] != '\0') { + if (roffce_lines < 1) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } else + roffce_lines--; + } + /* Convert all breakable hyphens into ASCII_HYPH. */ start = p = buf->buf + pos; @@ -1409,7 +1493,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, /* Expand some escape sequences. */ e = roff_res(r, buf, ln, pos); - if (e == ROFF_IGN) + if (e == ROFF_IGN || e == ROFF_APPEND) return e; assert(e == ROFF_CONT); @@ -1435,7 +1519,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) return tbl_read(r->tbl, ln, buf->buf, ppos); if ( ! ctl) - return roff_parsetext(buf, pos, offs); + return roff_parsetext(r, buf, pos, offs); /* Skip empty request lines. */ @@ -1476,6 +1560,16 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, return tbl_read(r->tbl, ln, buf->buf, pos); } + /* For now, let high level macros abort .ce mode. */ + + if (ctl && roffce_node != NULL && + (t == TOKEN_NONE || t == ROFF_EQ || t == ROFF_TS)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } + /* * This is neither a roff request nor a user-defined macro. * Let the standard macro set parsers handle it. @@ -1531,8 +1625,10 @@ roff_parse(struct roff *r, char *buf, int *pos, int ln mac = cp; maclen = roff_getname(r, &cp, ln, ppos); - t = (r->current_string = roff_getstrn(r, mac, maclen)) - ? ROFF_USERDEF : roffhash_find(r->reqtab, mac, maclen); + t = (r->current_string = roff_getstrn(r, mac, maclen)) ? + ROFF_USERDEF : + (r->current_string = roff_getrenn(r, mac, maclen)) ? + ROFF_RENAMED : roffhash_find(r->reqtab, mac, maclen); if (t != TOKEN_NONE) *pos = cp - buf; @@ -2777,7 +2873,17 @@ roff_onearg(ROFF_ARGS) { struct roff_node *n; char *cp; + int npos; + if (r->man->flags & (MAN_BLINE | MAN_ELINE) && + (tok == ROFF_sp || tok == ROFF_ti)) + man_breakscope(r->man, tok); + + if (tok == ROFF_ce && roffce_node != NULL) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + } + roff_elem_alloc(r->man, ln, ppos, tok); n = r->man->last; @@ -2794,8 +2900,29 @@ roff_onearg(ROFF_ARGS) roff_word_alloc(r->man, ln, pos, buf->buf + pos); } - n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; - r->man->last = n; + if (tok == ROFF_ce) { + if (r->man->last->tok == ROFF_ce) { + roff_word_alloc(r->man, ln, pos, "1"); + r->man->last->flags |= NODE_NOSRC; + } + npos = 0; + if (roff_evalnum(r, ln, r->man->last->string, &npos, + &roffce_lines, 0) == 0) { + mandoc_vmsg(MANDOCERR_CE_NONUM, + r->parse, ln, pos, "ce %s", buf->buf + pos); + roffce_lines = 1; + } + if (roffce_lines < 1) { + r->man->last = r->man->last->parent; + roffce_node = NULL; + roffce_lines = 0; + } else + roffce_node = r->man->last->parent; + } else { + n->flags |= NODE_VALID | NODE_ENDED; + r->man->last = n; + } + n->flags |= NODE_LINE; r->man->next = ROFF_NEXT_SIBLING; return ROFF_IGN; } @@ -2826,6 +2953,8 @@ roff_manyarg(ROFF_ARGS) static enum rofferr roff_br(ROFF_ARGS) { + if (r->man->flags & (MAN_BLINE | MAN_ELINE)) + man_breakscope(r->man, ROFF_br); roff_elem_alloc(r->man, ln, ppos, ROFF_br); if (buf->buf[pos] != '\0') mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, @@ -2843,7 +2972,7 @@ roff_cc(ROFF_ARGS) p = buf->buf + pos; if (*p == '\0' || (r->control = *p++) == '.') - r->control = 0; + r->control = '\0'; if (*p != '\0') mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, @@ -2853,6 +2982,33 @@ roff_cc(ROFF_ARGS) } static enum rofferr +roff_ec(ROFF_ARGS) +{ + const char *p; + + p = buf->buf + pos; + if (*p == '\0') + r->escape = '\\'; + else { + r->escape = *p; + if (*++p != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, + ln, p - buf->buf, "ec ... %s", p); + } + return ROFF_IGN; +} + +static enum rofferr +roff_eo(ROFF_ARGS) +{ + r->escape = '\0'; + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, + ln, pos, "eo %s", buf->buf + pos); + return ROFF_IGN; +} + +static enum rofferr roff_tr(ROFF_ARGS) { const char *p, *first, *second; @@ -2915,6 +3071,56 @@ roff_tr(ROFF_ARGS) } static enum rofferr +roff_rn(ROFF_ARGS) +{ + const char *value; + char *oldn, *newn, *end; + size_t oldsz, newsz; + + oldn = newn = buf->buf + pos; + if (*oldn == '\0') + return ROFF_IGN; + + oldsz = roff_getname(r, &newn, ln, pos); + if (oldn[oldsz] == '\\' || *newn == '\0') + return ROFF_IGN; + + end = newn; + newsz = roff_getname(r, &end, ln, newn - buf->buf); + if (newsz == 0) + return ROFF_IGN; + + /* + * Rename a user-defined macro bearing the old name, + * overriding an existing renamed high-level macro + * bearing the new name, if that exists. + */ + + if ((value = roff_getstrn(r, oldn, oldsz)) != NULL) { + roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + return ROFF_IGN; + } + + /* + * Rename a high-level macro bearing the old name, + * either renaming it a second time if it was already + * renamed before, or renaming it for the first time. + * In both cases, override an existing user-defined + * macro bearing the new name, if that exists. + */ + + if ((value = roff_getrenn(r, oldn, oldsz)) != NULL) { + roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); + } else + roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + return ROFF_IGN; +} + +static enum rofferr roff_so(ROFF_ARGS) { char *name, *cp; @@ -3088,6 +3294,22 @@ roff_userdef(ROFF_ARGS) ROFF_REPARSE : ROFF_APPEND; } +/* + * Calling a high-level macro that was renamed with .rn. + * r->current_string has already been set up by roff_parse(). + */ +static enum rofferr +roff_renamed(ROFF_ARGS) +{ + char *nbuf; + + buf->sz = mandoc_asprintf(&nbuf, ".%s %s", r->current_string, + buf->buf + pos) + 1; + free(buf->buf); + buf->buf = nbuf; + return ROFF_CONT; +} + static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { @@ -3233,6 +3455,23 @@ roff_getstrn(const struct roff *r, const char *name, s return NULL; } +/* + * Check whether *name is the renamed name of a high-level macro. + * Return the standard name, or NULL if it is not. + */ +static const char * +roff_getrenn(const struct roff *r, const char *name, size_t len) +{ + const struct roffkv *n; + + for (n = r->rentab; n; n = n->next) + if (0 == strncmp(name, n->key.p, len) && + '\0' == n->key.p[(int)len]) + return n->val.p; + + return NULL; +} + static void roff_freestr(struct roffkv *r) { @@ -3379,9 +3618,9 @@ roff_getcontrol(const struct roff *r, const char *cp, pos = *ppos; - if (0 != r->control && cp[pos] == r->control) + if (r->control != '\0' && cp[pos] == r->control) pos++; - else if (0 != r->control) + else if (r->control != '\0') return 0; else if ('\\' == cp[pos] && '.' == cp[pos + 1]) pos += 2;