=================================================================== RCS file: /cvs/mandoc/roff.c,v retrieving revision 1.390 retrieving revision 1.396 diff -u -p -r1.390 -r1.396 --- mandoc/roff.c 2022/05/31 18:09:57 1.390 +++ mandoc/roff.c 2023/04/28 19:11:04 1.396 @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.390 2022/05/31 18:09:57 schwarze Exp $ */ +/* $Id: roff.c,v 1.396 2023/04/28 19:11:04 schwarze Exp $ */ /* * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons @@ -40,14 +40,6 @@ #include "tbl_parse.h" #include "eqn_parse.h" -/* - * ASCII_ESC is used to signal from roff_getarg() to roff_expand() - * that an escape sequence resulted from copy-in processing and - * needs to be checked or interpolated. As it is used nowhere - * else, it is defined here rather than in a header file. - */ -#define ASCII_ESC 27 - /* Maximum number of string expansions per line, to break infinite loops. */ #define EXPAND_LIMIT 1000 @@ -1375,6 +1367,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ + int isrc, idst; /* to reduce \\ and \. in names */ int deftype; /* type of definition to paste */ int argi; /* macro argument index */ int quote_args; /* true for \\$@, false for \\$* */ @@ -1410,8 +1403,8 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i * it to backslashes and translate backslashes to \e. */ - if (roff_escape(buf->buf, ln, pos, - &iesc, &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { + if (roff_escape(buf->buf, ln, pos, &iesc, &inam, + &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { while (pos < iend) { if (buf->buf[pos] == ec) { buf->buf[pos] = '\\'; @@ -1428,14 +1421,20 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i continue; } - /* - * Treat "\E" just like "\"; - * it only makes a difference in copy mode. - */ + /* Reduce \\ and \. in names. */ - inam = iesc + 1; - while (buf->buf[inam] == 'E') - inam++; + if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { + isrc = idst = iarg; + while (isrc < iendarg) { + if (isrc + 1 < iendarg && + buf->buf[isrc] == '\\' && + (buf->buf[isrc + 1] == '\\' || + buf->buf[isrc + 1] == '.')) + isrc++; + buf->buf[idst++] = buf->buf[isrc++]; + } + iendarg -= isrc - idst; + } /* Handle expansion. */ @@ -1450,7 +1449,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i break; /* - * If not overriden, + * If not overridden, * let \*(.T through to the formatters. */ @@ -1520,6 +1519,11 @@ roff_expand(struct roff *r, struct buf *buf, int ln, i *dst++ = '"'; } continue; + case 'A': + ubuf[0] = iendarg > iarg ? '1' : '0'; + ubuf[1] = '\0'; + res = ubuf; + break; case 'B': npos = 0; ubuf[0] = iendarg > iarg && iend > iendarg && @@ -1628,8 +1632,13 @@ roff_getarg(struct roff *r, char **cpp, int ln, int *p cp++; break; case '\\': - newesc = 1; + /* + * Signal to roff_expand() that an escape + * sequence resulted from copy-in processing + * and needs to be checked or interpolated. + */ cp[-pairs] = ASCII_ESC; + newesc = 1; pairs++; cp++; break; @@ -2470,7 +2479,7 @@ roff_getnum(const char *v, int *pos, int *res, int fla * Evaluate a string comparison condition. * The first character is the delimiter. * Succeed if the string up to its second occurrence - * matches the string up to its third occurence. + * matches the string up to its third occurrence. * Advance the cursor after the third occurrence * or lacking that, to the end of the line. */ @@ -3728,7 +3737,6 @@ roff_tr(ROFF_ARGS) { const char *p, *first, *second; size_t fsz, ssz; - enum mandoc_esc esc; p = buf->buf + pos; @@ -3742,23 +3750,15 @@ roff_tr(ROFF_ARGS) first = p++; if (*first == '\\') { - esc = mandoc_escape(&p, NULL, NULL); - if (esc == ESCAPE_ERROR) { - mandoc_msg(MANDOCERR_ESC_BAD, ln, - (int)(p - buf->buf), "%s", first); + if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) return ROFF_IGN; - } fsz = (size_t)(p - first); } second = p++; if (*second == '\\') { - esc = mandoc_escape(&p, NULL, NULL); - if (esc == ESCAPE_ERROR) { - mandoc_msg(MANDOCERR_ESC_BAD, ln, - (int)(p - buf->buf), "%s", second); + if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) return ROFF_IGN; - } ssz = (size_t)(p - second); } else if (*second == '\0') { mandoc_msg(MANDOCERR_TR_ODD, ln, @@ -4006,7 +4006,7 @@ static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { char *name, *cp; - size_t namesz; + int namesz, inam, iend; name = *cpp; if (*name == '\0') @@ -4014,24 +4014,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int p /* Advance cp to the byte after the end of the name. */ - for (cp = name; 1; cp++) { - namesz = cp - name; + cp = name; + namesz = 0; + for (;;) { if (*cp == '\0') break; if (*cp == ' ' || *cp == '\t') { cp++; break; } - if (*cp != '\\') + if (*cp != '\\') { + if (name + namesz < cp) { + name[namesz] = *cp; + *cp = ' '; + } + namesz++; + cp++; continue; + } if (cp[1] == '{' || cp[1] == '}') break; - if (*++cp == '\\') - continue; - mandoc_msg(MANDOCERR_NAMESC, ln, pos, - "%.*s", (int)(cp - name + 1), name); - mandoc_escape((const char **)&cp, NULL, NULL); - break; + if (roff_escape(cp, 0, 0, NULL, &inam, + NULL, NULL, &iend) != ESCAPE_UNDEF) { + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s%.*s", namesz, name, iend, cp); + cp += iend; + break; + } + + /* + * In an identifier, \\, \., \G and so on + * are reduced to \, ., G and so on, + * vaguely similar to copy mode. + */ + + name[namesz++] = cp[inam]; + while (iend--) { + if (cp >= name + namesz) + *cp = ' '; + cp++; + } } /* Read past spaces. */ @@ -4349,7 +4371,7 @@ roff_getformat(const struct roff *r) * return zero and don't change the current position. * If the control character has been set with `.cc', then let that grain * precedence. - * This is slighly contrary to groff, where using the non-breaking + * This is slightly contrary to groff, where using the non-breaking * control character when `cc' has been invoked will cause the * non-breaking macro contents to be printed verbatim. */