version 1.166, 2011/07/29 09:19:48 |
version 1.173, 2012/05/31 22:41:19 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
/* Maximum number of nested if-else conditionals. */ |
/* Maximum number of nested if-else conditionals. */ |
#define RSTACK_MAX 128 |
#define RSTACK_MAX 128 |
|
|
|
/* Maximum number of string expansions per line, to break infinite loops. */ |
|
#define EXPAND_LIMIT 1000 |
|
|
enum rofft { |
enum rofft { |
ROFF_ad, |
ROFF_ad, |
ROFF_am, |
ROFF_am, |
|
|
unsigned int u; /* unsigned integer */ |
unsigned int u; /* unsigned integer */ |
}; |
}; |
|
|
|
/* |
|
* An incredibly-simple string buffer. |
|
*/ |
struct roffstr { |
struct roffstr { |
char *p; |
char *p; /* nil-terminated buffer */ |
size_t sz; |
size_t sz; /* saved strlen(p) */ |
}; |
}; |
|
|
/* |
/* |
* A key-value string pair with lengths. |
* A key-value roffstr pair as part of a singly-linked list. |
*/ |
*/ |
struct roffkv { |
struct roffkv { |
struct roffstr key; |
struct roffstr key; |
|
|
int rstackpos; /* position in rstack */ |
int rstackpos; /* position in rstack */ |
struct reg regs[REG__MAX]; |
struct reg regs[REG__MAX]; |
struct roffkv *strtab; /* user-defined strings & macros */ |
struct roffkv *strtab; /* user-defined strings & macros */ |
struct roffkv *chrtab; /* user-defined characters */ |
struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ |
|
struct roffstr *xtab; /* single-byte trans table (`tr') */ |
const char *current_string; /* value of last called user macro */ |
const char *current_string; /* value of last called user macro */ |
struct tbl_node *first_tbl; /* first table parsed */ |
struct tbl_node *first_tbl; /* first table parsed */ |
struct tbl_node *last_tbl; /* last table parsed */ |
struct tbl_node *last_tbl; /* last table parsed */ |
Line 169 static enum rofferr roff_cond_sub(ROFF_ARGS); |
|
Line 176 static enum rofferr roff_cond_sub(ROFF_ARGS); |
|
static enum rofferr roff_ds(ROFF_ARGS); |
static enum rofferr roff_ds(ROFF_ARGS); |
static enum roffrule roff_evalcond(const char *, int *); |
static enum roffrule roff_evalcond(const char *, int *); |
static void roff_free1(struct roff *); |
static void roff_free1(struct roff *); |
static void roff_freestr(struct roffkv **); |
static void roff_freestr(struct roffkv *); |
static char *roff_getname(struct roff *, char **, int, int); |
static char *roff_getname(struct roff *, char **, int, int); |
static const char *roff_getstrn(const struct roff *, |
static const char *roff_getstrn(const struct roff *, |
const char *, size_t); |
const char *, size_t); |
static enum rofferr roff_line_ignore(ROFF_ARGS); |
static enum rofferr roff_line_ignore(ROFF_ARGS); |
static enum rofferr roff_nr(ROFF_ARGS); |
static enum rofferr roff_nr(ROFF_ARGS); |
static void roff_openeqn(struct roff *, const char *, |
static void roff_openeqn(struct roff *, const char *, |
int, int, const char *); |
int, int, const char *); |
static enum rofft roff_parse(struct roff *, const char *, int *); |
static enum rofft roff_parse(struct roff *, const char *, int *); |
static enum rofferr roff_parsetext(char *); |
static enum rofferr roff_parsetext(char *); |
static void roff_res(struct roff *, |
static enum rofferr roff_res(struct roff *, |
char **, size_t *, int, int); |
char **, size_t *, int, int); |
static enum rofferr roff_rm(ROFF_ARGS); |
static enum rofferr roff_rm(ROFF_ARGS); |
static void roff_setstr(struct roff *, |
static void roff_setstr(struct roff *, |
Line 346 roff_free1(struct roff *r) |
|
Line 353 roff_free1(struct roff *r) |
|
{ |
{ |
struct tbl_node *t; |
struct tbl_node *t; |
struct eqn_node *e; |
struct eqn_node *e; |
|
int i; |
|
|
while (NULL != (t = r->first_tbl)) { |
while (NULL != (t = r->first_tbl)) { |
r->first_tbl = t->next; |
r->first_tbl = t->next; |
Line 364 roff_free1(struct roff *r) |
|
Line 372 roff_free1(struct roff *r) |
|
while (r->last) |
while (r->last) |
roffnode_pop(r); |
roffnode_pop(r); |
|
|
roff_freestr(&r->strtab); |
roff_freestr(r->strtab); |
roff_freestr(&r->chrtab); |
roff_freestr(r->xmbtab); |
} |
|
|
|
|
r->strtab = r->xmbtab = NULL; |
|
|
|
if (r->xtab) |
|
for (i = 0; i < 128; i++) |
|
free(r->xtab[i].p); |
|
|
|
free(r->xtab); |
|
r->xtab = NULL; |
|
} |
|
|
void |
void |
roff_reset(struct roff *r) |
roff_reset(struct roff *r) |
{ |
{ |
Line 416 roff_alloc(struct mparse *parse) |
|
Line 432 roff_alloc(struct mparse *parse) |
|
* is processed. |
* is processed. |
* This also checks the syntax of regular escapes. |
* This also checks the syntax of regular escapes. |
*/ |
*/ |
static void |
static enum rofferr |
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) |
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) |
{ |
{ |
enum mandoc_esc esc; |
enum mandoc_esc esc; |
Line 424 roff_res(struct roff *r, char **bufp, size_t *szp, int |
|
Line 440 roff_res(struct roff *r, char **bufp, size_t *szp, int |
|
const char *stnam; /* start of the name, after "[(*" */ |
const char *stnam; /* start of the name, after "[(*" */ |
const char *cp; /* end of the name, e.g. before ']' */ |
const char *cp; /* end of the name, e.g. before ']' */ |
const char *res; /* the string to be substituted */ |
const char *res; /* the string to be substituted */ |
int i, maxl; |
int i, maxl, expand_count; |
size_t nsz; |
size_t nsz; |
char *n; |
char *n; |
|
|
|
expand_count = 0; |
|
|
again: |
again: |
cp = *bufp + pos; |
cp = *bufp + pos; |
while (NULL != (cp = strchr(cp, '\\'))) { |
while (NULL != (cp = strchr(cp, '\\'))) { |
|
|
*/ |
*/ |
|
|
if ('\0' == *cp) |
if ('\0' == *cp) |
return; |
return(ROFF_CONT); |
|
|
if ('*' != *cp) { |
if ('*' != *cp) { |
res = cp; |
res = cp; |
|
|
mandoc_msg |
mandoc_msg |
(MANDOCERR_BADESCAPE, r->parse, |
(MANDOCERR_BADESCAPE, r->parse, |
ln, (int)(stesc - *bufp), NULL); |
ln, (int)(stesc - *bufp), NULL); |
return; |
return(ROFF_CONT); |
} |
} |
|
|
cp++; |
cp++; |
|
|
|
|
switch (*cp) { |
switch (*cp) { |
case ('\0'): |
case ('\0'): |
return; |
return(ROFF_CONT); |
case ('('): |
case ('('): |
cp++; |
cp++; |
maxl = 2; |
maxl = 2; |
|
|
(MANDOCERR_BADESCAPE, |
(MANDOCERR_BADESCAPE, |
r->parse, ln, |
r->parse, ln, |
(int)(stesc - *bufp), NULL); |
(int)(stesc - *bufp), NULL); |
return; |
return(ROFF_CONT); |
} |
} |
if (0 == maxl && ']' == *cp) |
if (0 == maxl && ']' == *cp) |
break; |
break; |
|
|
|
|
*bufp = n; |
*bufp = n; |
*szp = nsz; |
*szp = nsz; |
goto again; |
|
|
if (EXPAND_LIMIT >= ++expand_count) |
|
goto again; |
|
|
|
/* Just leave the string unexpanded. */ |
|
mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); |
|
return(ROFF_IGN); |
} |
} |
|
return(ROFF_CONT); |
} |
} |
|
|
/* |
/* |
|
|
static enum rofferr |
static enum rofferr |
roff_parsetext(char *p) |
roff_parsetext(char *p) |
{ |
{ |
char l, r; |
|
size_t sz; |
size_t sz; |
const char *start; |
const char *start; |
enum mandoc_esc esc; |
enum mandoc_esc esc; |
Line 559 roff_parsetext(char *p) |
|
Line 583 roff_parsetext(char *p) |
|
continue; |
continue; |
} |
} |
|
|
l = *(p - 1); |
if (isalpha((unsigned char)p[-1]) && |
r = *(p + 1); |
isalpha((unsigned char)p[1])) |
if ('\\' != l && |
|
'\t' != r && '\t' != l && |
|
' ' != r && ' ' != l && |
|
'-' != r && '-' != l && |
|
! isdigit((unsigned char)l) && |
|
! isdigit((unsigned char)r)) |
|
*p = ASCII_HYPH; |
*p = ASCII_HYPH; |
p++; |
p++; |
} |
} |
Line 587 roff_parseln(struct roff *r, int ln, char **bufp, |
|
Line 605 roff_parseln(struct roff *r, int ln, char **bufp, |
|
* words to fill in. |
* words to fill in. |
*/ |
*/ |
|
|
roff_res(r, bufp, szp, ln, pos); |
e = roff_res(r, bufp, szp, ln, pos); |
|
if (ROFF_IGN == e) |
|
return(e); |
|
assert(ROFF_CONT == e); |
|
|
ppos = pos; |
ppos = pos; |
ctl = mandoc_getcontrol(*bufp, &pos); |
ctl = mandoc_getcontrol(*bufp, &pos); |
Line 757 roffnode_cleanscope(struct roff *r) |
|
Line 778 roffnode_cleanscope(struct roff *r) |
|
{ |
{ |
|
|
while (r->last) { |
while (r->last) { |
if (--r->last->endspan < 0) |
if (--r->last->endspan != 0) |
break; |
break; |
roffnode_pop(r); |
roffnode_pop(r); |
} |
} |
Line 1077 roff_line_ignore(ROFF_ARGS) |
|
Line 1098 roff_line_ignore(ROFF_ARGS) |
|
static enum rofferr |
static enum rofferr |
roff_cond(ROFF_ARGS) |
roff_cond(ROFF_ARGS) |
{ |
{ |
int sv; |
|
enum roffrule rule; |
|
|
|
|
roffnode_push(r, tok, NULL, ln, ppos); |
|
|
/* |
/* |
* An `.el' has no conditional body: it will consume the value |
* An `.el' has no conditional body: it will consume the value |
* of the current rstack entry set in prior `ie' calls or |
* of the current rstack entry set in prior `ie' calls or |
Line 1088 roff_cond(ROFF_ARGS) |
|
Line 1109 roff_cond(ROFF_ARGS) |
|
* If we're not an `el', however, then evaluate the conditional. |
* If we're not an `el', however, then evaluate the conditional. |
*/ |
*/ |
|
|
rule = ROFF_el == tok ? |
r->last->rule = ROFF_el == tok ? |
(r->rstackpos < 0 ? |
(r->rstackpos < 0 ? |
ROFFRULE_DENY : r->rstack[r->rstackpos--]) : |
ROFFRULE_DENY : r->rstack[r->rstackpos--]) : |
roff_evalcond(*bufp, &pos); |
roff_evalcond(*bufp, &pos); |
|
|
sv = pos; |
|
while (' ' == (*bufp)[pos]) |
|
pos++; |
|
|
|
/* |
/* |
* Roff is weird. If we have just white-space after the |
|
* conditional, it's considered the BODY and we exit without |
|
* really doing anything. Warn about this. It's probably |
|
* wrong. |
|
*/ |
|
|
|
if ('\0' == (*bufp)[pos] && sv != pos) { |
|
mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); |
|
return(ROFF_IGN); |
|
} |
|
|
|
roffnode_push(r, tok, NULL, ln, ppos); |
|
|
|
r->last->rule = rule; |
|
|
|
/* |
|
* An if-else will put the NEGATION of the current evaluated |
* An if-else will put the NEGATION of the current evaluated |
* conditional into the stack of rules. |
* conditional into the stack of rules. |
*/ |
*/ |
Line 1135 roff_cond(ROFF_ARGS) |
|
Line 1136 roff_cond(ROFF_ARGS) |
|
r->last->rule = ROFFRULE_DENY; |
r->last->rule = ROFFRULE_DENY; |
|
|
/* |
/* |
* Determine scope. If we're invoked with "\{" trailing the |
* Determine scope. |
* conditional, then we're in a multiline scope. Else our scope |
* If there is nothing on the line after the conditional, |
* expires on the next line. |
* not even whitespace, use next-line scope. |
*/ |
*/ |
|
|
r->last->endspan = 1; |
if ('\0' == (*bufp)[pos]) { |
|
r->last->endspan = 2; |
|
goto out; |
|
} |
|
|
|
while (' ' == (*bufp)[pos]) |
|
pos++; |
|
|
|
/* An opening brace requests multiline scope. */ |
|
|
if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { |
if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { |
r->last->endspan = -1; |
r->last->endspan = -1; |
pos += 2; |
pos += 2; |
|
goto out; |
} |
} |
|
|
/* |
/* |
* If there are no arguments on the line, the next-line scope is |
* Anything else following the conditional causes |
* assumed. |
* single-line scope. Warn if the scope contains |
|
* nothing but trailing whitespace. |
*/ |
*/ |
|
|
if ('\0' == (*bufp)[pos]) |
if ('\0' == (*bufp)[pos]) |
return(ROFF_IGN); |
mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); |
|
|
/* Otherwise re-run the roff parser after recalculating. */ |
r->last->endspan = 1; |
|
|
|
out: |
*offs = pos; |
*offs = pos; |
return(ROFF_RERUN); |
return(ROFF_RERUN); |
} |
} |
Line 1396 roff_tr(ROFF_ARGS) |
|
Line 1408 roff_tr(ROFF_ARGS) |
|
p--; |
p--; |
} |
} |
|
|
roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0); |
if (fsz > 1) { |
|
roff_setstrn(&r->xmbtab, first, |
|
fsz, second, ssz, 0); |
|
continue; |
|
} |
|
|
|
if (NULL == r->xtab) |
|
r->xtab = mandoc_calloc |
|
(128, sizeof(struct roffstr)); |
|
|
|
free(r->xtab[(int)*first].p); |
|
r->xtab[(int)*first].p = mandoc_strndup(second, ssz); |
|
r->xtab[(int)*first].sz = ssz; |
} |
} |
|
|
return(ROFF_IGN); |
return(ROFF_IGN); |
Line 1616 roff_getstrn(const struct roff *r, const char *name, s |
|
Line 1640 roff_getstrn(const struct roff *r, const char *name, s |
|
} |
} |
|
|
static void |
static void |
roff_freestr(struct roffkv **r) |
roff_freestr(struct roffkv *r) |
{ |
{ |
struct roffkv *n, *nn; |
struct roffkv *n, *nn; |
|
|
for (n = *r; n; n = nn) { |
for (n = r; n; n = nn) { |
free(n->key.p); |
free(n->key.p); |
free(n->val.p); |
free(n->val.p); |
nn = n->next; |
nn = n->next; |
free(n); |
free(n); |
} |
} |
|
|
*r = NULL; |
|
} |
} |
|
|
const struct tbl_span * |
const struct tbl_span * |
Line 1644 roff_eqn(const struct roff *r) |
|
Line 1666 roff_eqn(const struct roff *r) |
|
return(r->last_eqn ? &r->last_eqn->eqn : NULL); |
return(r->last_eqn ? &r->last_eqn->eqn : NULL); |
} |
} |
|
|
char |
|
roff_eqndelim(const struct roff *r) |
|
{ |
|
|
|
return('\0'); |
|
} |
|
|
|
/* |
/* |
* Duplicate an input string, making the appropriate character |
* Duplicate an input string, making the appropriate character |
* conversations (as stipulated by `tr') along the way. |
* conversations (as stipulated by `tr') along the way. |
Line 1665 roff_strdup(const struct roff *r, const char *p) |
|
Line 1680 roff_strdup(const struct roff *r, const char *p) |
|
size_t ssz, sz; |
size_t ssz, sz; |
enum mandoc_esc esc; |
enum mandoc_esc esc; |
|
|
if (NULL == r->chrtab) |
if (NULL == r->xmbtab && NULL == r->xtab) |
return(mandoc_strdup(p)); |
return(mandoc_strdup(p)); |
else if ('\0' == *p) |
else if ('\0' == *p) |
return(mandoc_strdup("")); |
return(mandoc_strdup("")); |
Line 1682 roff_strdup(const struct roff *r, const char *p) |
|
Line 1697 roff_strdup(const struct roff *r, const char *p) |
|
ssz = 0; |
ssz = 0; |
|
|
while ('\0' != *p) { |
while ('\0' != *p) { |
|
if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { |
|
sz = r->xtab[(int)*p].sz; |
|
res = mandoc_realloc(res, ssz + sz + 1); |
|
memcpy(res + ssz, r->xtab[(int)*p].p, sz); |
|
ssz += sz; |
|
p++; |
|
continue; |
|
} else if ('\\' != *p) { |
|
res = mandoc_realloc(res, ssz + 2); |
|
res[ssz++] = *p++; |
|
continue; |
|
} |
|
|
/* Search for term matches. */ |
/* Search for term matches. */ |
for (cp = r->chrtab; cp; cp = cp->next) |
for (cp = r->xmbtab; cp; cp = cp->next) |
if (0 == strncmp(p, cp->key.p, cp->key.sz)) |
if (0 == strncmp(p, cp->key.p, cp->key.sz)) |
break; |
break; |
|
|
Line 1701 roff_strdup(const struct roff *r, const char *p) |
|
Line 1729 roff_strdup(const struct roff *r, const char *p) |
|
continue; |
continue; |
} |
} |
|
|
if ('\\' == *p) { |
/* |
/* |
* Handle escapes carefully: we need to copy |
* Handle escapes carefully: we need to copy |
* over just the escape itself, or else we might |
* over just the escape itself, or else we might |
* do replacements within the escape itself. |
* do replacements within the escape itself. |
* Make sure to pass along the bogus string. |
* Make sure to pass along the bogus string. |
*/ |
*/ |
pp = p++; |
pp = p++; |
esc = mandoc_escape(&p, NULL, NULL); |
esc = mandoc_escape(&p, NULL, NULL); |
if (ESCAPE_ERROR == esc) { |
if (ESCAPE_ERROR == esc) { |
sz = strlen(pp); |
sz = strlen(pp); |
|
res = mandoc_realloc(res, ssz + sz + 1); |
|
memcpy(res + ssz, pp, sz); |
|
break; |
|
} |
|
/* |
|
* We bail out on bad escapes. |
|
* No need to warn: we already did so when |
|
* roff_res() was called. |
|
*/ |
|
sz = (int)(p - pp); |
|
res = mandoc_realloc(res, ssz + sz + 1); |
res = mandoc_realloc(res, ssz + sz + 1); |
memcpy(res + ssz, pp, sz); |
memcpy(res + ssz, pp, sz); |
ssz += sz; |
break; |
continue; |
|
} |
} |
|
/* |
/* Just append the charater. */ |
* We bail out on bad escapes. |
res = mandoc_realloc(res, ssz + 2); |
* No need to warn: we already did so when |
res[ssz++] = *p++; |
* roff_res() was called. |
|
*/ |
|
sz = (int)(p - pp); |
|
res = mandoc_realloc(res, ssz + sz + 1); |
|
memcpy(res + ssz, pp, sz); |
|
ssz += sz; |
} |
} |
|
|
res[(int)ssz] = '\0'; |
res[(int)ssz] = '\0'; |