=================================================================== RCS file: /cvs/mandoc/roff_escape.c,v retrieving revision 1.6 retrieving revision 1.14 diff -u -p -r1.6 -r1.14 --- mandoc/roff_escape.c 2022/06/01 23:20:26 1.6 +++ mandoc/roff_escape.c 2022/06/08 13:23:57 1.14 @@ -1,4 +1,4 @@ -/* $OpenBSD: roff_escape.c,v 1.6 2022/06/01 23:20:26 schwarze Exp $ */ +/* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */ /* * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 * Ingo Schwarze @@ -41,7 +41,8 @@ mandoc_escape(const char **rendarg, const char **rarg, int iarg, iendarg, iend; enum mandoc_esc rval; - rval = roff_escape(--*rendarg, 0, 0, NULL, &iarg, &iendarg, &iend); + rval = roff_escape(--*rendarg, 0, 0, + NULL, NULL, &iarg, &iendarg, &iend); assert(rval != ESCAPE_EXPAND); if (rarg != NULL) *rarg = *rendarg + iarg; @@ -58,25 +59,27 @@ mandoc_escape(const char **rendarg, const char **rarg, * sequence are returned in *resc ... *rend. * Otherwise, *resc is set to aesc and the positions of the escape * sequence starting at aesc are returned. - * Diagnostic messages are generated if and only if resc != NULL, + * Diagnostic messages are generated if and only if ln != 0, * that is, if and only if called by roff_expand(). */ enum mandoc_esc roff_escape(const char *buf, const int ln, const int aesc, - int *resc, int *rarg, int *rendarg, int *rend) + int *resc, int *rnam, int *rarg, int *rendarg, int *rend) { int iesc; /* index of leading escape char */ + int inam; /* index of escape name */ int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ - int sesc, sarg, sendarg, send; /* for sub-escape */ + int sesc, snam, sarg, sendarg, send; /* for sub-escape */ + int escterm; /* whether term is escaped */ int maxl; /* expected length of the argument */ int argl; /* actual length of the argument */ int c, i; /* for \[char...] parsing */ int valid_A; /* for \A parsing */ enum mandoc_esc rval; /* return value */ + enum mandoc_esc stype; /* for sub-escape */ enum mandocerr err; /* diagnostic code */ - char esc_name; char term; /* byte terminating the argument */ /* @@ -84,21 +87,21 @@ roff_escape(const char *buf, const int ln, const int a * it only makes a difference in copy mode. */ - iesc = iarg = aesc; + iesc = inam = aesc; do { - iarg++; - } while (buf[iarg] == 'E'); + inam++; + } while (buf[inam] == 'E'); /* * Sort the following cases first by syntax category, * then by escape sequence type, and finally by ASCII code. */ - esc_name = buf[iarg]; - iendarg = iend = ++iarg; + iarg = iendarg = iend = inam + 1; maxl = INT_MAX; term = '\0'; - switch (esc_name) { + err = MANDOCERR_OK; + switch (buf[inam]) { /* Escape sequences taking no arguments at all. */ @@ -127,6 +130,7 @@ roff_escape(const char *buf, const int ln, const int a case '\0': iendarg = --iend; /* FALLTHROUGH */ + case '.': case '\\': default: iarg--; @@ -136,7 +140,6 @@ roff_escape(const char *buf, const int ln, const int a case ' ': case '\'': case '-': - case '.': case '0': case ':': case '_': @@ -205,10 +208,6 @@ roff_escape(const char *buf, const int ln, const int a term = '\b'; break; case 'C': - if (buf[iarg] != '\'') { - rval = ESCAPE_ERROR; - goto out; - } rval = ESCAPE_SPECIAL; term = '\b'; break; @@ -267,22 +266,45 @@ roff_escape(const char *buf, const int ln, const int a /* Decide how to end the argument. */ + escterm = 0; + stype = ESCAPE_EXPAND; if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) && - buf[iarg] == buf[iesc] && roff_escape(buf, ln, iendarg, - &sesc, &sarg, &sendarg, &send) == ESCAPE_EXPAND) - goto out_sub; + buf[iarg] == buf[iesc]) { + stype = roff_escape(buf, ln, iendarg, + &sesc, &snam, &sarg, &sendarg, &send); + if (stype == ESCAPE_EXPAND) + goto out_sub; + } if (term == '\b') { - if ((esc_name == 'N' && isdigit((unsigned char)buf[iarg])) || - (esc_name == 'h' && strchr(" %&()*+-./0123456789:<=>", - buf[iarg]) != NULL)) { - iendarg = iend = iarg + 1; - rval = ESCAPE_ERROR; - goto out; + if (stype == ESCAPE_UNDEF) + iarg++; + if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) { + if (strchr("BHLRSNhlvx", buf[inam]) != NULL && + strchr(" ,.0DLOXYZ^abdhlortuvx|~", + buf[snam]) != NULL) { + err = MANDOCERR_ESC_DELIM; + iend = send; + iarg = iendarg = sesc; + goto out; + } + escterm = 1; + iarg = send; + term = buf[snam]; + } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL && + strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) { + err = MANDOCERR_ESC_DELIM; + if (rval != ESCAPE_EXPAND) + rval = ESCAPE_ERROR; + if (buf[inam] != 'D') { + iendarg = iend = iarg + 1; + goto out; + } } - term = buf[iarg++]; + if (term == '\b') + term = buf[iarg++]; } else if (term == '\0' && maxl == INT_MAX) { - if (esc_name == 'n' && (buf[iarg] == '+' || buf[iarg] == '-')) + if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-')) iarg++; switch (buf[iarg]) { case '(': @@ -292,6 +314,7 @@ roff_escape(const char *buf, const int ln, const int a case '[': if (buf[++iarg] == ' ') { iendarg = iend = iarg + 1; + err = MANDOCERR_ESC_ARG; rval = ESCAPE_ERROR; goto out; } @@ -309,32 +332,35 @@ roff_escape(const char *buf, const int ln, const int a iendarg = iarg; while (maxl > 0) { if (buf[iendarg] == '\0') { - /* Ignore an incomplete argument except for \w. */ - if (esc_name != 'w') + err = MANDOCERR_ESC_INCOMPLETE; + if (rval != ESCAPE_EXPAND && + rval != ESCAPE_OVERSTRIKE) + rval = ESCAPE_ERROR; + /* Usually, ignore an incomplete argument. */ + if (strchr("Aow", buf[inam]) == NULL) iendarg = iarg; break; } - if (buf[iendarg] == term) { + if (escterm == 0 && buf[iendarg] == term) { iend = iendarg + 1; break; } - if (esc_name == 'N' && - isdigit((unsigned char)buf[iendarg]) == 0) { - iend = iendarg + 1; - break; - } if (buf[iendarg] == buf[iesc]) { - switch (roff_escape(buf, ln, iendarg, - &sesc, &sarg, &sendarg, &send)) { - case ESCAPE_EXPAND: + stype = roff_escape(buf, ln, iendarg, + &sesc, &snam, &sarg, &sendarg, &send); + if (stype == ESCAPE_EXPAND) goto out_sub; - case ESCAPE_UNDEF: + iend = send; + if (escterm == 1 && + (buf[snam] == term || buf[inam] == 'N')) break; - default: + if (stype != ESCAPE_UNDEF) valid_A = 0; - break; - } - iendarg = iend = send; + iendarg = send; + } else if (buf[inam] == 'N' && + isdigit((unsigned char)buf[iendarg]) == 0) { + iend = iendarg + 1; + break; } else { if (buf[iendarg] == ' ' || buf[iendarg] == '\t') valid_A = 0; @@ -343,14 +369,11 @@ roff_escape(const char *buf, const int ln, const int a iend = ++iendarg; } } - if (resc != NULL && ((maxl != INT_MAX && maxl != 0) || - (term != '\0' && buf[iendarg] != term))) - mandoc_msg(MANDOCERR_ESC_BAD, ln, iesc, "%s", buf + iesc); /* Post-process depending on the content of the argument. */ argl = iendarg - iarg; - switch (esc_name) { + switch (buf[inam]) { case '*': if (resc == NULL && argl == 2 && buf[iarg] == '.' && buf[iarg + 1] == 'T') @@ -369,13 +392,23 @@ roff_escape(const char *buf, const int ln, const int a case '2': case '3': case '4': - rval = argl == 1 ? ESCAPE_IGNORE : ESCAPE_ERROR; + if (argl == 1) + rval = ESCAPE_IGNORE; + else { + err = MANDOCERR_ESC_ARG; + rval = ESCAPE_ERROR; + } break; case '5': - rval = buf[iarg - 1] == '[' ? ESCAPE_UNSUPP : - ESCAPE_ERROR; + if (buf[iarg - 1] == '[') + rval = ESCAPE_UNSUPP; + else { + err = MANDOCERR_ESC_ARG; + rval = ESCAPE_ERROR; + } break; default: + err = MANDOCERR_ESC_ARG; rval = ESCAPE_ERROR; break; } @@ -387,9 +420,16 @@ roff_escape(const char *buf, const int ln, const int a switch (rval) { case ESCAPE_FONT: rval = mandoc_font(buf + iarg, argl); + if (rval == ESCAPE_ERROR) + err = MANDOCERR_ESC_ARG; break; case ESCAPE_SPECIAL: + if (argl == 0) { + err = MANDOCERR_ESC_BADCHAR; + rval = ESCAPE_ERROR; + break; + } /* * The file chars.c only provides one common list of @@ -399,6 +439,7 @@ roff_escape(const char *buf, const int ln, const int a */ if (term != '\0' && argl == 1 && buf[iarg] != '-') { + err = MANDOCERR_ESC_BADCHAR; rval = ESCAPE_ERROR; break; } @@ -414,8 +455,10 @@ roff_escape(const char *buf, const int ln, const int a c = 0; for (i = iarg; i < iendarg; i++) c = 10 * c + (buf[i] - '0'); - if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) + if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) { + err = MANDOCERR_ESC_BADCHAR; break; + } iarg += 4; rval = ESCAPE_NUMBERED; break; @@ -431,13 +474,19 @@ roff_escape(const char *buf, const int ln, const int a if (buf[iarg] != 'u' || argl < 5 || argl > 7) break; if (argl == 7 && - (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) + (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { + err = MANDOCERR_ESC_BADCHAR; break; - if (argl == 6 && buf[iarg + 1] == '0') + } + if (argl == 6 && buf[iarg + 1] == '0') { + err = MANDOCERR_ESC_BADCHAR; break; + } if (argl == 5 && buf[iarg + 1] == 'D' && - strchr("89ABCDEF", buf[iarg + 2]) != NULL) + strchr("89ABCDEF", buf[iarg + 2]) != NULL) { + err = MANDOCERR_ESC_BADCHAR; break; + } if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") + 1 == argl) rval = ESCAPE_UNICODE; @@ -449,19 +498,24 @@ roff_escape(const char *buf, const int ln, const int a out_sub: iesc = sesc; + inam = snam; iarg = sarg; iendarg = sendarg; iend = send; rval = ESCAPE_EXPAND; out: + if (resc != NULL) + *resc = iesc; + if (rnam != NULL) + *rnam = inam; if (rarg != NULL) *rarg = iarg; if (rendarg != NULL) *rendarg = iendarg; if (rend != NULL) *rend = iend; - if (resc == NULL) + if (ln == 0) return rval; /* @@ -469,27 +523,24 @@ out: * from the parser, not when called from the formatters. */ - *resc = iesc; switch (rval) { - case ESCAPE_ERROR: - err = MANDOCERR_ESC_BAD; - break; case ESCAPE_UNSUPP: err = MANDOCERR_ESC_UNSUPP; break; case ESCAPE_UNDEF: - if (esc_name == '\\') - return rval; - err = MANDOCERR_ESC_UNDEF; + if (buf[inam] != '\\' && buf[inam] != '.') + err = MANDOCERR_ESC_UNDEF; break; case ESCAPE_SPECIAL: if (mchars_spec2cp(buf + iarg, argl) >= 0) - return rval; - err = MANDOCERR_ESC_BAD; + err = MANDOCERR_OK; + else if (err == MANDOCERR_OK) + err = MANDOCERR_ESC_UNKCHAR; break; default: - return rval; + break; } - mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc); + if (err != MANDOCERR_OK) + mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc); return rval; }