version 1.1, 2022/05/19 15:37:47 |
version 1.15, 2024/05/16 21:23:00 |
|
|
/* $OpenBSD$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 |
* Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 |
* Ingo Schwarze <schwarze@openbsd.org> |
* Ingo Schwarze <schwarze@openbsd.org> |
Line 41 mandoc_escape(const char **rendarg, const char **rarg, |
|
Line 41 mandoc_escape(const char **rendarg, const char **rarg, |
|
int iarg, iendarg, iend; |
int iarg, iendarg, iend; |
enum mandoc_esc rval; |
enum mandoc_esc rval; |
|
|
rval = roff_escape(--*rendarg, 0, 0, NULL, &iarg, &iendarg, &iend); |
rval = roff_escape(--*rendarg, 0, 0, |
|
NULL, NULL, &iarg, &iendarg, &iend); |
assert(rval != ESCAPE_EXPAND); |
assert(rval != ESCAPE_EXPAND); |
if (rarg != NULL) |
if (rarg != NULL) |
*rarg = *rendarg + iarg; |
*rarg = *rendarg + iarg; |
Line 58 mandoc_escape(const char **rendarg, const char **rarg, |
|
Line 59 mandoc_escape(const char **rendarg, const char **rarg, |
|
* sequence are returned in *resc ... *rend. |
* sequence are returned in *resc ... *rend. |
* Otherwise, *resc is set to aesc and the positions of the escape |
* Otherwise, *resc is set to aesc and the positions of the escape |
* sequence starting at aesc are returned. |
* sequence starting at aesc are returned. |
* Diagnostic messages are generated if and only if resc != NULL, |
* Diagnostic messages are generated if and only if ln != 0, |
* that is, if and only if called by roff_expand(). |
* that is, if and only if called by roff_expand(). |
*/ |
*/ |
enum mandoc_esc |
enum mandoc_esc |
roff_escape(const char *buf, const int ln, const int aesc, |
roff_escape(const char *buf, const int ln, const int aesc, |
int *resc, int *rarg, int *rendarg, int *rend) |
int *resc, int *rnam, int *rarg, int *rendarg, int *rend) |
{ |
{ |
int iesc; /* index of leading escape char */ |
int iesc; /* index of leading escape char */ |
|
int inam; /* index of escape name */ |
int iarg; /* index beginning the argument */ |
int iarg; /* index beginning the argument */ |
int iendarg; /* index right after the argument */ |
int iendarg; /* index right after the argument */ |
int iend; /* index right after the sequence */ |
int iend; /* index right after the sequence */ |
int sesc, sarg, sendarg, send; /* for sub-escape */ |
int sesc, snam, sarg, sendarg, send; /* for sub-escape */ |
|
int escterm; /* whether term is escaped */ |
int maxl; /* expected length of the argument */ |
int maxl; /* expected length of the argument */ |
int argl; /* actual length of the argument */ |
int argl; /* actual length of the argument */ |
int c, i; /* for \[char...] parsing */ |
int c, i; /* for \[char...] parsing */ |
|
int valid_A; /* for \A parsing */ |
enum mandoc_esc rval; /* return value */ |
enum mandoc_esc rval; /* return value */ |
|
enum mandoc_esc stype; /* for sub-escape */ |
enum mandocerr err; /* diagnostic code */ |
enum mandocerr err; /* diagnostic code */ |
char esc_name; |
|
char term; /* byte terminating the argument */ |
char term; /* byte terminating the argument */ |
|
|
/* |
/* |
Line 83 roff_escape(const char *buf, const int ln, const int a |
|
Line 87 roff_escape(const char *buf, const int ln, const int a |
|
* it only makes a difference in copy mode. |
* it only makes a difference in copy mode. |
*/ |
*/ |
|
|
iesc = iarg = aesc; |
iesc = inam = aesc; |
do { |
do { |
iarg++; |
inam++; |
} while (buf[iarg] == 'E'); |
} while (buf[inam] == 'E'); |
|
|
/* |
/* |
* Sort the following cases first by syntax category, |
* Sort the following cases first by syntax category, |
* then by escape sequence type, and finally by ASCII code. |
* then by escape sequence type, and finally by ASCII code. |
*/ |
*/ |
|
|
esc_name = buf[iarg]; |
iarg = iendarg = iend = inam + 1; |
iendarg = iend = ++iarg; |
|
maxl = INT_MAX; |
maxl = INT_MAX; |
term = '\0'; |
term = '\0'; |
switch (esc_name) { |
err = MANDOCERR_OK; |
|
switch (buf[inam]) { |
|
|
/* Escape sequences taking no arguments at all. */ |
/* Escape sequences taking no arguments at all. */ |
|
|
case '!': |
case '!': |
case '?': |
case '?': |
|
case 'r': |
rval = ESCAPE_UNSUPP; |
rval = ESCAPE_UNSUPP; |
goto out; |
goto out; |
|
|
Line 114 roff_escape(const char *buf, const int ln, const int a |
|
Line 119 roff_escape(const char *buf, const int ln, const int a |
|
case '^': |
case '^': |
case 'a': |
case 'a': |
case 'd': |
case 'd': |
case 'r': |
|
case 't': |
case 't': |
case 'u': |
case 'u': |
case '{': |
case '{': |
Line 123 roff_escape(const char *buf, const int ln, const int a |
|
Line 127 roff_escape(const char *buf, const int ln, const int a |
|
rval = ESCAPE_IGNORE; |
rval = ESCAPE_IGNORE; |
goto out; |
goto out; |
|
|
|
case '\0': |
|
iendarg = --iend; |
|
/* FALLTHROUGH */ |
|
case '.': |
case '\\': |
case '\\': |
default: |
default: |
iarg--; |
iarg--; |
Line 132 roff_escape(const char *buf, const int ln, const int a |
|
Line 140 roff_escape(const char *buf, const int ln, const int a |
|
case ' ': |
case ' ': |
case '\'': |
case '\'': |
case '-': |
case '-': |
case '.': |
|
case '0': |
case '0': |
case ':': |
case ':': |
case '_': |
case '_': |
Line 157 roff_escape(const char *buf, const int ln, const int a |
|
Line 164 roff_escape(const char *buf, const int ln, const int a |
|
|
|
case '$': |
case '$': |
case '*': |
case '*': |
|
case 'V': |
|
case 'g': |
case 'n': |
case 'n': |
rval = ESCAPE_EXPAND; |
rval = ESCAPE_EXPAND; |
break; |
break; |
case 'F': |
case 'F': |
case 'M': |
case 'M': |
case 'O': |
case 'O': |
case 'V': |
|
case 'Y': |
case 'Y': |
case 'g': |
|
case 'k': |
case 'k': |
case 'm': |
case 'm': |
rval = ESCAPE_IGNORE; |
rval = ESCAPE_IGNORE; |
Line 181 roff_escape(const char *buf, const int ln, const int a |
|
Line 188 roff_escape(const char *buf, const int ln, const int a |
|
|
|
/* Quoted arguments */ |
/* Quoted arguments */ |
|
|
|
case 'A': |
case 'B': |
case 'B': |
case 'w': |
case 'w': |
rval = ESCAPE_EXPAND; |
rval = ESCAPE_EXPAND; |
term = '\b'; |
term = '\b'; |
break; |
break; |
case 'A': |
|
case 'D': |
case 'D': |
case 'H': |
case 'H': |
case 'L': |
case 'L': |
Line 201 roff_escape(const char *buf, const int ln, const int a |
|
Line 208 roff_escape(const char *buf, const int ln, const int a |
|
term = '\b'; |
term = '\b'; |
break; |
break; |
case 'C': |
case 'C': |
if (buf[iarg] != '\'') { |
|
rval = ESCAPE_ERROR; |
|
goto out; |
|
} |
|
rval = ESCAPE_SPECIAL; |
rval = ESCAPE_SPECIAL; |
term = '\b'; |
term = '\b'; |
break; |
break; |
Line 263 roff_escape(const char *buf, const int ln, const int a |
|
Line 266 roff_escape(const char *buf, const int ln, const int a |
|
|
|
/* Decide how to end the argument. */ |
/* Decide how to end the argument. */ |
|
|
|
escterm = 0; |
|
stype = ESCAPE_EXPAND; |
if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) && |
if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) && |
buf[iarg] == buf[iesc] && roff_escape(buf, ln, iendarg, |
buf[iarg] == buf[iesc]) { |
&sesc, &sarg, &sendarg, &send) == ESCAPE_EXPAND) |
stype = roff_escape(buf, ln, iendarg, |
goto out_sub; |
&sesc, &snam, &sarg, &sendarg, &send); |
|
if (stype == ESCAPE_EXPAND) |
|
goto out_sub; |
|
} |
|
|
if (term == '\b') { |
if (term == '\b') { |
if ((esc_name == 'N' && isdigit((unsigned char)buf[iarg])) || |
if (stype == ESCAPE_UNDEF) |
(esc_name == 'h' && strchr(" %&()*+-./0123456789:<=>", |
iarg++; |
buf[iarg]) != NULL)) { |
if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) { |
iendarg = iend = iarg + 1; |
if (strchr("BHLRSNhlvx", buf[inam]) != NULL && |
rval = ESCAPE_ERROR; |
strchr(" ,.0DLOXYZ^abdhlortuvx|~", |
goto out; |
buf[snam]) != NULL) { |
|
err = MANDOCERR_ESC_DELIM; |
|
iend = send; |
|
iarg = iendarg = sesc; |
|
goto out; |
|
} |
|
escterm = 1; |
|
iarg = send; |
|
term = buf[snam]; |
|
} else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL && |
|
strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) { |
|
err = MANDOCERR_ESC_DELIM; |
|
if (rval != ESCAPE_EXPAND) |
|
rval = ESCAPE_ERROR; |
|
if (buf[inam] != 'D') { |
|
iendarg = iend = iarg + 1; |
|
goto out; |
|
} |
} |
} |
term = buf[iarg++]; |
if (term == '\b') |
|
term = buf[iarg++]; |
} else if (term == '\0' && maxl == INT_MAX) { |
} else if (term == '\0' && maxl == INT_MAX) { |
if (esc_name == 'n' && (buf[iarg] == '+' || buf[iarg] == '-')) |
if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-')) |
iarg++; |
iarg++; |
switch (buf[iarg]) { |
switch (buf[iarg]) { |
case '(': |
case '(': |
Line 288 roff_escape(const char *buf, const int ln, const int a |
|
Line 314 roff_escape(const char *buf, const int ln, const int a |
|
case '[': |
case '[': |
if (buf[++iarg] == ' ') { |
if (buf[++iarg] == ' ') { |
iendarg = iend = iarg + 1; |
iendarg = iend = iarg + 1; |
|
err = MANDOCERR_ESC_ARG; |
rval = ESCAPE_ERROR; |
rval = ESCAPE_ERROR; |
goto out; |
goto out; |
} |
} |
Line 301 roff_escape(const char *buf, const int ln, const int a |
|
Line 328 roff_escape(const char *buf, const int ln, const int a |
|
|
|
/* Advance to the end of the argument. */ |
/* Advance to the end of the argument. */ |
|
|
|
valid_A = 1; |
iendarg = iarg; |
iendarg = iarg; |
while (maxl > 0) { |
while (maxl > 0) { |
if (buf[iendarg] == '\0') { |
if (buf[iendarg] == '\0') { |
/* Ignore an incomplete argument except for \w. */ |
err = MANDOCERR_ESC_INCOMPLETE; |
if (esc_name != 'w') |
if (rval != ESCAPE_EXPAND && |
|
rval != ESCAPE_OVERSTRIKE) |
|
rval = ESCAPE_ERROR; |
|
/* Usually, ignore an incomplete argument. */ |
|
if (strchr("Aow", buf[inam]) == NULL) |
iendarg = iarg; |
iendarg = iarg; |
break; |
break; |
} |
} |
if (buf[iendarg] == term) { |
if (escterm == 0 && buf[iendarg] == term) { |
iend = iendarg + 1; |
iend = iendarg + 1; |
break; |
break; |
} |
} |
if (esc_name == 'N' && |
if (buf[iendarg] == buf[iesc]) { |
|
stype = roff_escape(buf, ln, iendarg, |
|
&sesc, &snam, &sarg, &sendarg, &send); |
|
if (stype == ESCAPE_EXPAND) |
|
goto out_sub; |
|
iend = send; |
|
if (escterm == 1 && |
|
(buf[snam] == term || buf[inam] == 'N')) |
|
break; |
|
if (stype != ESCAPE_UNDEF) |
|
valid_A = 0; |
|
iendarg = send; |
|
} else if (buf[inam] == 'N' && |
isdigit((unsigned char)buf[iendarg]) == 0) { |
isdigit((unsigned char)buf[iendarg]) == 0) { |
iend = iendarg + 1; |
iend = iendarg + 1; |
break; |
break; |
} |
|
if (buf[iendarg] == buf[iesc]) { |
|
if (roff_escape(buf, ln, iendarg, |
|
&sesc, &sarg, &sendarg, &send) == ESCAPE_EXPAND) |
|
goto out_sub; |
|
iendarg = iend = send; |
|
} else { |
} else { |
|
if (buf[iendarg] == ' ' || buf[iendarg] == '\t') |
|
valid_A = 0; |
if (maxl != INT_MAX) |
if (maxl != INT_MAX) |
maxl--; |
maxl--; |
iend = ++iendarg; |
iend = ++iendarg; |
} |
} |
} |
} |
if (resc != NULL && ((maxl != INT_MAX && maxl != 0) || |
|
(term != '\0' && buf[iendarg] != term))) |
|
mandoc_msg(MANDOCERR_ESC_BAD, ln, iesc, "%s", buf + iesc); |
|
|
|
/* Post-process depending on the content of the argument. */ |
/* Post-process depending on the content of the argument. */ |
|
|
argl = iendarg - iarg; |
argl = iendarg - iarg; |
switch (esc_name) { |
switch (buf[inam]) { |
case '*': |
case '*': |
if (resc == NULL && argl == 2 && |
if (resc == NULL && argl == 2 && |
buf[iarg] == '.' && buf[iarg + 1] == 'T') |
buf[iarg] == '.' && buf[iarg + 1] == 'T') |
rval = ESCAPE_DEVICE; |
rval = ESCAPE_DEVICE; |
break; |
break; |
|
case 'A': |
|
if (valid_A == 0) |
|
iendarg = iarg; |
|
break; |
case 'O': |
case 'O': |
switch (buf[iarg]) { |
switch (buf[iarg]) { |
case '0': |
case '0': |
Line 351 roff_escape(const char *buf, const int ln, const int a |
|
Line 392 roff_escape(const char *buf, const int ln, const int a |
|
case '2': |
case '2': |
case '3': |
case '3': |
case '4': |
case '4': |
rval = argl == 1 ? ESCAPE_IGNORE : ESCAPE_ERROR; |
if (argl == 1) |
|
rval = ESCAPE_IGNORE; |
|
else { |
|
err = MANDOCERR_ESC_ARG; |
|
rval = ESCAPE_ERROR; |
|
} |
break; |
break; |
case '5': |
case '5': |
rval = buf[iarg - 1] == '[' ? ESCAPE_UNSUPP : |
if (buf[iarg - 1] == '[') |
ESCAPE_ERROR; |
rval = ESCAPE_UNSUPP; |
|
else { |
|
err = MANDOCERR_ESC_ARG; |
|
rval = ESCAPE_ERROR; |
|
} |
break; |
break; |
default: |
default: |
|
err = MANDOCERR_ESC_ARG; |
rval = ESCAPE_ERROR; |
rval = ESCAPE_ERROR; |
break; |
break; |
} |
} |
Line 369 roff_escape(const char *buf, const int ln, const int a |
|
Line 420 roff_escape(const char *buf, const int ln, const int a |
|
switch (rval) { |
switch (rval) { |
case ESCAPE_FONT: |
case ESCAPE_FONT: |
rval = mandoc_font(buf + iarg, argl); |
rval = mandoc_font(buf + iarg, argl); |
|
if (rval == ESCAPE_ERROR) |
|
err = MANDOCERR_ESC_ARG; |
break; |
break; |
|
|
case ESCAPE_SPECIAL: |
case ESCAPE_SPECIAL: |
|
if (argl == 0) { |
|
err = MANDOCERR_ESC_BADCHAR; |
|
rval = ESCAPE_ERROR; |
|
break; |
|
} |
|
|
/* |
/* |
* The file chars.c only provides one common list of |
* The file chars.c only provides one common list of |
Line 381 roff_escape(const char *buf, const int ln, const int a |
|
Line 439 roff_escape(const char *buf, const int ln, const int a |
|
*/ |
*/ |
|
|
if (term != '\0' && argl == 1 && buf[iarg] != '-') { |
if (term != '\0' && argl == 1 && buf[iarg] != '-') { |
|
err = MANDOCERR_ESC_BADCHAR; |
rval = ESCAPE_ERROR; |
rval = ESCAPE_ERROR; |
break; |
break; |
} |
} |
Line 396 roff_escape(const char *buf, const int ln, const int a |
|
Line 455 roff_escape(const char *buf, const int ln, const int a |
|
c = 0; |
c = 0; |
for (i = iarg; i < iendarg; i++) |
for (i = iarg; i < iendarg; i++) |
c = 10 * c + (buf[i] - '0'); |
c = 10 * c + (buf[i] - '0'); |
if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) |
if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) { |
|
err = MANDOCERR_ESC_BADCHAR; |
break; |
break; |
|
} |
iarg += 4; |
iarg += 4; |
rval = ESCAPE_NUMBERED; |
rval = ESCAPE_NUMBERED; |
break; |
break; |
Line 406 roff_escape(const char *buf, const int ln, const int a |
|
Line 467 roff_escape(const char *buf, const int ln, const int a |
|
/* |
/* |
* Unicode escapes are defined in groff as \[u0000] |
* Unicode escapes are defined in groff as \[u0000] |
* to \[u10FFFF], where the contained value must be |
* to \[u10FFFF], where the contained value must be |
* a valid Unicode codepoint. Here, however, only |
* a valid Unicode codepoint. |
* check the length and range. |
|
*/ |
*/ |
|
|
if (buf[iarg] != 'u' || argl < 5 || argl > 7) |
if (buf[iarg] != 'u' || argl < 5 || argl > 7) |
break; |
break; |
if (argl == 7 && |
if (argl == 7 && /* beyond the Unicode range */ |
(buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) |
(buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { |
|
err = MANDOCERR_ESC_BADCHAR; |
break; |
break; |
if (argl == 6 && buf[iarg + 1] == '0') |
} |
|
if (argl == 6 && buf[iarg + 1] == '0') { |
|
err = MANDOCERR_ESC_BADCHAR; |
break; |
break; |
if (argl == 5 && buf[iarg + 1] == 'D' && |
} |
strchr("89ABCDEF", buf[iarg + 2]) != NULL) |
if (argl == 5 && /* UTF-16 surrogate */ |
|
toupper((unsigned char)buf[iarg + 1]) == 'D' && |
|
strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) { |
|
err = MANDOCERR_ESC_BADCHAR; |
break; |
break; |
|
} |
if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") |
if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") |
+ 1 == argl) |
+ 1 == argl) |
rval = ESCAPE_UNICODE; |
rval = ESCAPE_UNICODE; |
Line 431 roff_escape(const char *buf, const int ln, const int a |
|
Line 498 roff_escape(const char *buf, const int ln, const int a |
|
|
|
out_sub: |
out_sub: |
iesc = sesc; |
iesc = sesc; |
|
inam = snam; |
iarg = sarg; |
iarg = sarg; |
iendarg = sendarg; |
iendarg = sendarg; |
iend = send; |
iend = send; |
rval = ESCAPE_EXPAND; |
rval = ESCAPE_EXPAND; |
|
|
out: |
out: |
|
if (resc != NULL) |
|
*resc = iesc; |
|
if (rnam != NULL) |
|
*rnam = inam; |
if (rarg != NULL) |
if (rarg != NULL) |
*rarg = iarg; |
*rarg = iarg; |
if (rendarg != NULL) |
if (rendarg != NULL) |
*rendarg = iendarg; |
*rendarg = iendarg; |
if (rend != NULL) |
if (rend != NULL) |
*rend = iend; |
*rend = iend; |
if (resc == NULL) |
if (ln == 0) |
return rval; |
return rval; |
|
|
/* |
/* |
|
|
* from the parser, not when called from the formatters. |
* from the parser, not when called from the formatters. |
*/ |
*/ |
|
|
*resc = iesc; |
|
switch (rval) { |
switch (rval) { |
case ESCAPE_ERROR: |
|
err = MANDOCERR_ESC_BAD; |
|
break; |
|
case ESCAPE_UNSUPP: |
case ESCAPE_UNSUPP: |
err = MANDOCERR_ESC_UNSUPP; |
err = MANDOCERR_ESC_UNSUPP; |
break; |
break; |
case ESCAPE_UNDEF: |
case ESCAPE_UNDEF: |
if (esc_name == '\\') |
if (buf[inam] != '\\' && buf[inam] != '.') |
return rval; |
err = MANDOCERR_ESC_UNDEF; |
err = MANDOCERR_ESC_UNDEF; |
|
break; |
break; |
case ESCAPE_SPECIAL: |
case ESCAPE_SPECIAL: |
if (mchars_spec2cp(buf + iarg, argl) >= 0) |
if (mchars_spec2cp(buf + iarg, argl) >= 0) |
return rval; |
err = MANDOCERR_OK; |
err = MANDOCERR_ESC_BAD; |
else if (err == MANDOCERR_OK) |
|
err = MANDOCERR_ESC_UNKCHAR; |
break; |
break; |
default: |
default: |
return rval; |
break; |
} |
} |
mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc); |
if (err != MANDOCERR_OK) |
|
mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc); |
return rval; |
return rval; |
} |
} |