=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.391 retrieving revision 1.399 diff -u -p -r1.391 -r1.399 --- mandoc/roff.c 2022/05/31 20:23:05 1.391 +++ mandoc/roff.c 2023/10/23 20:25:02 1.399 @@ -1,6 +1,6 @@ -/* $Id: roff.c,v 1.391 2022/05/31 20:23:05 schwarze Exp $ */ +/* $Id: roff.c,v 1.399 2023/10/23 20:25:02 schwarze Exp $ */ /* - * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze + * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any @@ -40,14 +40,6 @@ #include "tbl_parse.h" #include "eqn_parse.h" -/* - * ASCII_ESC is used to signal from roff_getarg() to roff_expand() - * that an escape sequence resulted from copy-in processing and - * needs to be checked or interpolated. As it is used nowhere - * else, it is defined here rather than in a header file. - */ -#define ASCII_ESC 27 - /* Maximum number of string expansions per line, to break infinite loops. */ #define EXPAND_LIMIT 1000 @@ -1370,11 +1362,13 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i const char *res; /* the string to be pasted */ const char *src; /* source for copying */ char *dst; /* destination for copying */ + enum mandoc_esc subtype; /* return value from roff_escape */ int iesc; /* index of leading escape char */ int inam; /* index of the escape name */ int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ + int isrc, idst; /* to reduce \\ and \. in names */ int deftype; /* type of definition to paste */ int argi; /* macro argument index */ int quote_args; /* true for \\$@, false for \\$* */ @@ -1394,7 +1388,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i */ if (buf->buf[pos] != ec) { - if (ec != ASCII_ESC && buf->buf[pos] == '\\') { + if (buf->buf[pos] == '\\') { roff_expand_patch(buf, pos, "\\e", pos + 1); pos++; } @@ -1410,8 +1404,8 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i * it to backslashes and translate backslashes to \e. */ - if (roff_escape(buf->buf, ln, pos, - &iesc, &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { + if (roff_escape(buf->buf, ln, pos, &iesc, &inam, + &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { while (pos < iend) { if (buf->buf[pos] == ec) { buf->buf[pos] = '\\'; @@ -1428,14 +1422,20 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i continue; } - /* - * Treat "\E" just like "\"; - * it only makes a difference in copy mode. - */ + /* Reduce \\ and \. in names. */ - inam = iesc + 1; - while (buf->buf[inam] == 'E') - inam++; + if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { + isrc = idst = iarg; + while (isrc < iendarg) { + if (isrc + 1 < iendarg && + buf->buf[isrc] == '\\' && + (buf->buf[isrc + 1] == '\\' || + buf->buf[isrc + 1] == '.')) + isrc++; + buf->buf[idst++] = buf->buf[isrc++]; + } + iendarg -= isrc - idst; + } /* Handle expansion. */ @@ -1450,7 +1450,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i break; /* - * If not overriden, + * If not overridden, * let \*(.T through to the formatters. */ @@ -1552,8 +1552,34 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i res = ubuf; break; case 'w': - (void)snprintf(ubuf, sizeof(ubuf), - "%d", (iendarg - iarg) * 24); + rsz = 0; + subtype = ESCAPE_UNDEF; + while (iarg < iendarg) { + asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1; + if (buf->buf[iarg] != '\\') { + rsz += asz; + iarg++; + continue; + } + switch ((subtype = roff_escape(buf->buf, 0, + iarg, NULL, NULL, NULL, NULL, &iarg))) { + case ESCAPE_SPECIAL: + case ESCAPE_NUMBERED: + case ESCAPE_UNICODE: + case ESCAPE_OVERSTRIKE: + case ESCAPE_UNDEF: + break; + case ESCAPE_DEVICE: + asz *= 8; + break; + case ESCAPE_EXPAND: + abort(); + default: + continue; + } + rsz += asz; + } + (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24); res = ubuf; break; default: @@ -1633,8 +1659,8 @@ roff_getarg(struct roff *r, char **cpp, int ln, int *p cp++; break; case '\\': + cp[-pairs] = '\\'; newesc = 1; - cp[-pairs] = ASCII_ESC; pairs++; cp++; break; @@ -1690,7 +1716,7 @@ roff_getarg(struct roff *r, char **cpp, int ln, int *p buf.buf = start; buf.sz = strlen(start) + 1; buf.next = NULL; - if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { + if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) { free(buf.buf); buf.buf = mandoc_strdup(""); } @@ -2475,7 +2501,7 @@ roff_getnum(const char *v, int *pos, int *res, int fla * Evaluate a string comparison condition. * The first character is the delimiter. * Succeed if the string up to its second occurrence - * matches the string up to its third occurence. + * matches the string up to its third occurrence. * Advance the cursor after the third occurrence * or lacking that, to the end of the line. */ @@ -3733,7 +3759,6 @@ roff_tr(ROFF_ARGS) { const char *p, *first, *second; size_t fsz, ssz; - enum mandoc_esc esc; p = buf->buf + pos; @@ -3747,23 +3772,15 @@ roff_tr(ROFF_ARGS) first = p++; if (*first == '\\') { - esc = mandoc_escape(&p, NULL, NULL); - if (esc == ESCAPE_ERROR) { - mandoc_msg(MANDOCERR_ESC_BAD, ln, - (int)(p - buf->buf), "%s", first); + if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) return ROFF_IGN; - } fsz = (size_t)(p - first); } second = p++; if (*second == '\\') { - esc = mandoc_escape(&p, NULL, NULL); - if (esc == ESCAPE_ERROR) { - mandoc_msg(MANDOCERR_ESC_BAD, ln, - (int)(p - buf->buf), "%s", second); + if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) return ROFF_IGN; - } ssz = (size_t)(p - second); } else if (*second == '\0') { mandoc_msg(MANDOCERR_TR_ODD, ln, @@ -4011,7 +4028,7 @@ static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { char *name, *cp; - size_t namesz; + int namesz, inam, iend; name = *cpp; if (*name == '\0') @@ -4019,24 +4036,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int p /* Advance cp to the byte after the end of the name. */ - for (cp = name; 1; cp++) { - namesz = cp - name; + cp = name; + namesz = 0; + for (;;) { if (*cp == '\0') break; if (*cp == ' ' || *cp == '\t') { cp++; break; } - if (*cp != '\\') + if (*cp != '\\') { + if (name + namesz < cp) { + name[namesz] = *cp; + *cp = ' '; + } + namesz++; + cp++; continue; + } if (cp[1] == '{' || cp[1] == '}') break; - if (*++cp == '\\') - continue; - mandoc_msg(MANDOCERR_NAMESC, ln, pos, - "%.*s", (int)(cp - name + 1), name); - mandoc_escape((const char **)&cp, NULL, NULL); - break; + if (roff_escape(cp, 0, 0, NULL, &inam, + NULL, NULL, &iend) != ESCAPE_UNDEF) { + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s%.*s", namesz, name, iend, cp); + cp += iend; + break; + } + + /* + * In an identifier, \\, \., \G and so on + * are reduced to \, ., G and so on, + * vaguely similar to copy mode. + */ + + name[namesz++] = cp[inam]; + while (iend--) { + if (cp >= name + namesz) + *cp = ' '; + cp++; + } } /* Read past spaces. */ @@ -4354,7 +4393,7 @@ roff_getformat(const struct roff *r) * return zero and don't change the current position. * If the control character has been set with `.cc', then let that grain * precedence. - * This is slighly contrary to groff, where using the non-breaking + * This is slightly contrary to groff, where using the non-breaking * control character when `cc' has been invoked will cause the * non-breaking macro contents to be printed verbatim. */