version 1.350, 2018/12/14 05:18:03 |
version 1.355, 2018/12/21 17:15:19 |
|
|
#include "tbl_parse.h" |
#include "tbl_parse.h" |
#include "eqn_parse.h" |
#include "eqn_parse.h" |
|
|
|
/* |
|
* ASCII_ESC is used to signal from roff_getarg() to roff_expand() |
|
* that an escape sequence resulted from copy-in processing and |
|
* needs to be checked or interpolated. As it is used nowhere |
|
* else, it is defined here rather than in a header file. |
|
*/ |
|
#define ASCII_ESC 27 |
|
|
/* Maximum number of string expansions per line, to break infinite loops. */ |
/* Maximum number of string expansions per line, to break infinite loops. */ |
#define EXPAND_LIMIT 1000 |
#define EXPAND_LIMIT 1000 |
|
|
|
|
}; |
}; |
|
|
struct roff { |
struct roff { |
struct mparse *parse; /* parse point */ |
|
struct roff_man *man; /* mdoc or man parser */ |
struct roff_man *man; /* mdoc or man parser */ |
struct roffnode *last; /* leaf of stack */ |
struct roffnode *last; /* leaf of stack */ |
struct mctx *mstack; /* stack of macro contexts */ |
struct mctx *mstack; /* stack of macro contexts */ |
Line 192 static int roff_evalnum(struct roff *, int, |
|
Line 199 static int roff_evalnum(struct roff *, int, |
|
static int roff_evalpar(struct roff *, int, |
static int roff_evalpar(struct roff *, int, |
const char *, int *, int *, int); |
const char *, int *, int *, int); |
static int roff_evalstrcond(const char *, int *); |
static int roff_evalstrcond(const char *, int *); |
|
static int roff_expand(struct roff *, struct buf *, |
|
int, int, char); |
static void roff_free1(struct roff *); |
static void roff_free1(struct roff *); |
static void roff_freereg(struct roffreg *); |
static void roff_freereg(struct roffreg *); |
static void roff_freestr(struct roffkv *); |
static void roff_freestr(struct roffkv *); |
Line 220 static enum roff_tok roff_parse(struct roff *, char * |
|
Line 229 static enum roff_tok roff_parse(struct roff *, char * |
|
static int roff_parsetext(struct roff *, struct buf *, |
static int roff_parsetext(struct roff *, struct buf *, |
int, int *); |
int, int *); |
static int roff_renamed(ROFF_ARGS); |
static int roff_renamed(ROFF_ARGS); |
static int roff_res(struct roff *, struct buf *, int, int); |
|
static int roff_return(ROFF_ARGS); |
static int roff_return(ROFF_ARGS); |
static int roff_rm(ROFF_ARGS); |
static int roff_rm(ROFF_ARGS); |
static int roff_rn(ROFF_ARGS); |
static int roff_rn(ROFF_ARGS); |
Line 780 roff_free(struct roff *r) |
|
Line 788 roff_free(struct roff *r) |
|
} |
} |
|
|
struct roff * |
struct roff * |
roff_alloc(struct mparse *parse, int options) |
roff_alloc(int options) |
{ |
{ |
struct roff *r; |
struct roff *r; |
|
|
r = mandoc_calloc(1, sizeof(struct roff)); |
r = mandoc_calloc(1, sizeof(struct roff)); |
r->parse = parse; |
|
r->reqtab = roffhash_alloc(0, ROFF_RENAMED); |
r->reqtab = roffhash_alloc(0, ROFF_RENAMED); |
r->options = options; |
r->options = options; |
r->format = options & (MPARSE_MDOC | MPARSE_MAN); |
r->format = options & (MPARSE_MDOC | MPARSE_MAN); |
Line 844 roff_man_free(struct roff_man *man) |
|
Line 851 roff_man_free(struct roff_man *man) |
|
} |
} |
|
|
struct roff_man * |
struct roff_man * |
roff_man_alloc(struct roff *roff, struct mparse *parse, |
roff_man_alloc(struct roff *roff, const char *os_s, int quick) |
const char *os_s, int quick) |
|
{ |
{ |
struct roff_man *man; |
struct roff_man *man; |
|
|
man = mandoc_calloc(1, sizeof(*man)); |
man = mandoc_calloc(1, sizeof(*man)); |
man->parse = parse; |
|
man->roff = roff; |
man->roff = roff; |
man->os_s = os_s; |
man->os_s = os_s; |
man->quick = quick; |
man->quick = quick; |
Line 1146 deroff(char **dest, const struct roff_node *n) |
|
Line 1151 deroff(char **dest, const struct roff_node *n) |
|
/* --- main functions of the roff parser ---------------------------------- */ |
/* --- main functions of the roff parser ---------------------------------- */ |
|
|
/* |
/* |
* In the current line, expand escape sequences that tend to get |
* In the current line, expand escape sequences that produce parsable |
* used in numerical expressions and conditional requests. |
* input text. Also check the syntax of the remaining escape sequences, |
* Also check the syntax of the remaining escape sequences. |
* which typically produce output glyphs or change formatter state. |
*/ |
*/ |
static int |
static int |
roff_res(struct roff *r, struct buf *buf, int ln, int pos) |
roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) |
{ |
{ |
struct mctx *ctx; /* current macro call context */ |
struct mctx *ctx; /* current macro call context */ |
char ubuf[24]; /* buffer to print the number */ |
char ubuf[24]; /* buffer to print the number */ |
struct roff_node *n; /* used for header comments */ |
struct roff_node *n; /* used for header comments */ |
const char *start; /* start of the string to process */ |
const char *start; /* start of the string to process */ |
char *stesc; /* start of an escape sequence ('\\') */ |
char *stesc; /* start of an escape sequence ('\\') */ |
|
const char *esct; /* type of esccape sequence */ |
char *ep; /* end of comment string */ |
char *ep; /* end of comment string */ |
const char *stnam; /* start of the name, after "[(*" */ |
const char *stnam; /* start of the name, after "[(*" */ |
const char *cp; /* end of the name, e.g. before ']' */ |
const char *cp; /* end of the name, e.g. before ']' */ |
Line 1167 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1173 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
size_t naml; /* actual length of the escape name */ |
size_t naml; /* actual length of the escape name */ |
size_t asz; /* length of the replacement */ |
size_t asz; /* length of the replacement */ |
size_t rsz; /* length of the rest of the string */ |
size_t rsz; /* length of the rest of the string */ |
enum mandoc_esc esc; /* type of the escape sequence */ |
|
int inaml; /* length returned from mandoc_escape() */ |
int inaml; /* length returned from mandoc_escape() */ |
int expand_count; /* to avoid infinite loops */ |
int expand_count; /* to avoid infinite loops */ |
int npos; /* position in numeric expression */ |
int npos; /* position in numeric expression */ |
Line 1176 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1181 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
int done; /* no more input available */ |
int done; /* no more input available */ |
int deftype; /* type of definition to paste */ |
int deftype; /* type of definition to paste */ |
int rcsid; /* kind of RCS id seen */ |
int rcsid; /* kind of RCS id seen */ |
|
enum mandocerr err; /* for escape sequence problems */ |
char sign; /* increment number register */ |
char sign; /* increment number register */ |
char term; /* character terminating the escape */ |
char term; /* character terminating the escape */ |
|
|
Line 1184 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1190 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
done = 0; |
done = 0; |
start = buf->buf + pos; |
start = buf->buf + pos; |
for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { |
for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { |
if (stesc[0] != r->escape || stesc[1] == '\0') |
if (stesc[0] != newesc || stesc[1] == '\0') |
continue; |
continue; |
stesc++; |
stesc++; |
if (*stesc != '"' && *stesc != '#') |
if (*stesc != '"' && *stesc != '#') |
Line 1226 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1232 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
* in the syntax tree. |
* in the syntax tree. |
*/ |
*/ |
|
|
if (r->format == 0) { |
if (newesc != ASCII_ESC && r->format == 0) { |
while (*ep == ' ' || *ep == '\t') |
while (*ep == ' ' || *ep == '\t') |
ep--; |
ep--; |
ep[1] = '\0'; |
ep[1] = '\0'; |
Line 1267 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1273 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
|
|
expand_count = 0; |
expand_count = 0; |
while (stesc >= start) { |
while (stesc >= start) { |
|
if (*stesc != newesc) { |
|
|
/* Search backwards for the next backslash. */ |
/* |
|
* If we have a non-standard escape character, |
|
* escape literal backslashes because all |
|
* processing in subsequent functions uses |
|
* the standard escaping rules. |
|
*/ |
|
|
if (*stesc != r->escape) { |
if (newesc != ASCII_ESC && *stesc == '\\') { |
if (*stesc == '\\') { |
|
*stesc = '\0'; |
*stesc = '\0'; |
buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", |
buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", |
buf->buf, stesc + 1) + 1; |
buf->buf, stesc + 1) + 1; |
Line 1280 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1291 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
free(buf->buf); |
free(buf->buf); |
buf->buf = nbuf; |
buf->buf = nbuf; |
} |
} |
|
|
|
/* Search backwards for the next escape. */ |
|
|
stesc--; |
stesc--; |
continue; |
continue; |
} |
} |
Line 1308 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1322 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
|
|
term = '\0'; |
term = '\0'; |
cp = stesc + 1; |
cp = stesc + 1; |
switch (*cp) { |
if (*cp == 'E') |
|
cp++; |
|
esct = cp; |
|
switch (*esct) { |
case '*': |
case '*': |
case '$': |
case '$': |
res = NULL; |
res = NULL; |
Line 1324 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1341 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
res = ubuf; |
res = ubuf; |
break; |
break; |
default: |
default: |
esc = mandoc_escape(&cp, &stnam, &inaml); |
err = MANDOCERR_OK; |
if (esc == ESCAPE_ERROR || |
switch(mandoc_escape(&cp, &stnam, &inaml)) { |
(esc == ESCAPE_SPECIAL && |
case ESCAPE_SPECIAL: |
mchars_spec2cp(stnam, inaml) < 0)) |
if (mchars_spec2cp(stnam, inaml) >= 0) |
mandoc_msg(MANDOCERR_ESC_BAD, |
break; |
ln, (int)(stesc - buf->buf), |
/* FALLTHROUGH */ |
|
case ESCAPE_ERROR: |
|
err = MANDOCERR_ESC_BAD; |
|
break; |
|
case ESCAPE_UNDEF: |
|
err = MANDOCERR_ESC_UNDEF; |
|
break; |
|
case ESCAPE_UNSUPP: |
|
err = MANDOCERR_ESC_UNSUPP; |
|
break; |
|
default: |
|
break; |
|
} |
|
if (err != MANDOCERR_OK) |
|
mandoc_msg(err, ln, (int)(stesc - buf->buf), |
"%.*s", (int)(cp - stesc), stesc); |
"%.*s", (int)(cp - stesc), stesc); |
stesc--; |
stesc--; |
continue; |
continue; |
Line 1386 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1417 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
cp++; |
cp++; |
break; |
break; |
} |
} |
if (*cp++ != '\\' || stesc[1] != 'w') { |
if (*cp++ != '\\' || *esct != 'w') { |
naml++; |
naml++; |
continue; |
continue; |
} |
} |
Line 1394 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1425 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
case ESCAPE_SPECIAL: |
case ESCAPE_SPECIAL: |
case ESCAPE_UNICODE: |
case ESCAPE_UNICODE: |
case ESCAPE_NUMBERED: |
case ESCAPE_NUMBERED: |
|
case ESCAPE_UNDEF: |
case ESCAPE_OVERSTRIKE: |
case ESCAPE_OVERSTRIKE: |
naml++; |
naml++; |
break; |
break; |
Line 1407 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1439 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
* undefined, resume searching for escapes. |
* undefined, resume searching for escapes. |
*/ |
*/ |
|
|
switch (stesc[1]) { |
switch (*esct) { |
case '*': |
case '*': |
if (arg_complete) { |
if (arg_complete) { |
deftype = ROFFDEF_USER | ROFFDEF_PRE; |
deftype = ROFFDEF_USER | ROFFDEF_PRE; |
Line 1434 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1466 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
break; |
break; |
} |
} |
ctx = r->mstack + r->mstackpos; |
ctx = r->mstack + r->mstackpos; |
npos = stesc[2] - '1'; |
npos = esct[1] - '1'; |
if (npos >= 0 && npos <= 8) { |
if (npos >= 0 && npos <= 8) { |
res = npos < ctx->argc ? |
res = npos < ctx->argc ? |
ctx->argv[npos] : ""; |
ctx->argv[npos] : ""; |
break; |
break; |
} |
} |
if (stesc[2] == '*') |
if (esct[1] == '*') |
quote_args = 0; |
quote_args = 0; |
else if (stesc[2] == '@') |
else if (esct[1] == '@') |
quote_args = 1; |
quote_args = 1; |
else { |
else { |
mandoc_msg(MANDOCERR_ARG_NONUM, ln, |
mandoc_msg(MANDOCERR_ARG_NONUM, ln, |
Line 1504 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1536 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
} |
} |
|
|
if (res == NULL) { |
if (res == NULL) { |
if (stesc[1] == '*') |
if (*esct == '*') |
mandoc_msg(MANDOCERR_STR_UNDEF, |
mandoc_msg(MANDOCERR_STR_UNDEF, |
ln, (int)(stesc - buf->buf), |
ln, (int)(stesc - buf->buf), |
"%.*s", (int)naml, stnam); |
"%.*s", (int)naml, stnam); |
Line 1532 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
Line 1564 roff_res(struct roff *r, struct buf *buf, int ln, int |
|
} |
} |
|
|
/* |
/* |
|
* Parse a quoted or unquoted roff-style request or macro argument. |
|
* Return a pointer to the parsed argument, which is either the original |
|
* pointer or advanced by one byte in case the argument is quoted. |
|
* NUL-terminate the argument in place. |
|
* Collapse pairs of quotes inside quoted arguments. |
|
* Advance the argument pointer to the next argument, |
|
* or to the NUL byte terminating the argument line. |
|
*/ |
|
char * |
|
roff_getarg(struct roff *r, char **cpp, int ln, int *pos) |
|
{ |
|
struct buf buf; |
|
char *cp, *start; |
|
int newesc, pairs, quoted, white; |
|
|
|
/* Quoting can only start with a new word. */ |
|
start = *cpp; |
|
quoted = 0; |
|
if ('"' == *start) { |
|
quoted = 1; |
|
start++; |
|
} |
|
|
|
newesc = pairs = white = 0; |
|
for (cp = start; '\0' != *cp; cp++) { |
|
|
|
/* |
|
* Move the following text left |
|
* after quoted quotes and after "\\" and "\t". |
|
*/ |
|
if (pairs) |
|
cp[-pairs] = cp[0]; |
|
|
|
if ('\\' == cp[0]) { |
|
/* |
|
* In copy mode, translate double to single |
|
* backslashes and backslash-t to literal tabs. |
|
*/ |
|
switch (cp[1]) { |
|
case 'a': |
|
case 't': |
|
cp[-pairs] = '\t'; |
|
pairs++; |
|
cp++; |
|
break; |
|
case '\\': |
|
newesc = 1; |
|
cp[-pairs] = ASCII_ESC; |
|
pairs++; |
|
cp++; |
|
break; |
|
case ' ': |
|
/* Skip escaped blanks. */ |
|
if (0 == quoted) |
|
cp++; |
|
break; |
|
default: |
|
break; |
|
} |
|
} else if (0 == quoted) { |
|
if (' ' == cp[0]) { |
|
/* Unescaped blanks end unquoted args. */ |
|
white = 1; |
|
break; |
|
} |
|
} else if ('"' == cp[0]) { |
|
if ('"' == cp[1]) { |
|
/* Quoted quotes collapse. */ |
|
pairs++; |
|
cp++; |
|
} else { |
|
/* Unquoted quotes end quoted args. */ |
|
quoted = 2; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
/* Quoted argument without a closing quote. */ |
|
if (1 == quoted) |
|
mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); |
|
|
|
/* NUL-terminate this argument and move to the next one. */ |
|
if (pairs) |
|
cp[-pairs] = '\0'; |
|
if ('\0' != *cp) { |
|
*cp++ = '\0'; |
|
while (' ' == *cp) |
|
cp++; |
|
} |
|
*pos += (int)(cp - start) + (quoted ? 1 : 0); |
|
*cpp = cp; |
|
|
|
if ('\0' == *cp && (white || ' ' == cp[-1])) |
|
mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); |
|
|
|
start = mandoc_strdup(start); |
|
if (newesc == 0) |
|
return start; |
|
|
|
buf.buf = start; |
|
buf.sz = strlen(start) + 1; |
|
buf.next = NULL; |
|
if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { |
|
free(buf.buf); |
|
buf.buf = mandoc_strdup(""); |
|
} |
|
return buf.buf; |
|
} |
|
|
|
|
|
/* |
* Process text streams. |
* Process text streams. |
*/ |
*/ |
static int |
static int |
Line 1625 roff_parseln(struct roff *r, int ln, struct buf *buf, |
|
Line 1769 roff_parseln(struct roff *r, int ln, struct buf *buf, |
|
|
|
/* Expand some escape sequences. */ |
/* Expand some escape sequences. */ |
|
|
e = roff_res(r, buf, ln, pos); |
e = roff_expand(r, buf, ln, pos, r->escape); |
if ((e & ROFF_MASK) == ROFF_IGN) |
if ((e & ROFF_MASK) == ROFF_IGN) |
return e; |
return e; |
assert(e == ROFF_CONT); |
assert(e == ROFF_CONT); |
Line 3148 roff_EQ(ROFF_ARGS) |
|
Line 3292 roff_EQ(ROFF_ARGS) |
|
|
|
assert(r->eqn == NULL); |
assert(r->eqn == NULL); |
if (r->last_eqn == NULL) |
if (r->last_eqn == NULL) |
r->last_eqn = eqn_alloc(r->parse); |
r->last_eqn = eqn_alloc(); |
else |
else |
eqn_reset(r->last_eqn); |
eqn_reset(r->last_eqn); |
r->eqn = r->last_eqn; |
r->eqn = r->last_eqn; |
Line 3182 roff_TS(ROFF_ARGS) |
|
Line 3326 roff_TS(ROFF_ARGS) |
|
mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); |
mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); |
tbl_end(r->tbl, 0); |
tbl_end(r->tbl, 0); |
} |
} |
r->tbl = tbl_alloc(ppos, ln, r->parse, r->last_tbl); |
r->tbl = tbl_alloc(ppos, ln, r->last_tbl); |
if (r->last_tbl == NULL) |
if (r->last_tbl == NULL) |
r->first_tbl = r->tbl; |
r->first_tbl = r->tbl; |
r->last_tbl = r->tbl; |
r->last_tbl = r->tbl; |
Line 3659 roff_userdef(ROFF_ARGS) |
|
Line 3803 roff_userdef(ROFF_ARGS) |
|
ctx->argv = mandoc_reallocarray(ctx->argv, |
ctx->argv = mandoc_reallocarray(ctx->argv, |
ctx->argsz, sizeof(*ctx->argv)); |
ctx->argsz, sizeof(*ctx->argv)); |
} |
} |
arg = mandoc_getarg(r->parse, &src, ln, &pos); |
arg = roff_getarg(r, &src, ln, &pos); |
sz = 1; /* For the terminating NUL. */ |
sz = 1; /* For the terminating NUL. */ |
for (ap = arg; *ap != '\0'; ap++) |
for (ap = arg; *ap != '\0'; ap++) |
sz += *ap == '"' ? 4 : 1; |
sz += *ap == '"' ? 4 : 1; |
Line 3672 roff_userdef(ROFF_ARGS) |
|
Line 3816 roff_userdef(ROFF_ARGS) |
|
*dst++ = *ap; |
*dst++ = *ap; |
} |
} |
*dst = '\0'; |
*dst = '\0'; |
|
free(arg); |
} |
} |
|
|
/* Replace the macro invocation by the macro definition. */ |
/* Replace the macro invocation by the macro definition. */ |
Line 4021 roff_strdup(const struct roff *r, const char *p) |
|
Line 4166 roff_strdup(const struct roff *r, const char *p) |
|
/* |
/* |
* We bail out on bad escapes. |
* We bail out on bad escapes. |
* No need to warn: we already did so when |
* No need to warn: we already did so when |
* roff_res() was called. |
* roff_expand() was called. |
*/ |
*/ |
sz = (int)(p - pp); |
sz = (int)(p - pp); |
res = mandoc_realloc(res, ssz + sz + 1); |
res = mandoc_realloc(res, ssz + sz + 1); |