=================================================================== RCS file: /cvs/mandoc/eqn.c,v retrieving revision 1.12 retrieving revision 1.16 diff -u -p -r1.12 -r1.16 --- mandoc/eqn.c 2011/07/21 10:24:35 1.12 +++ mandoc/eqn.c 2011/07/21 13:18:24 1.16 @@ -1,4 +1,4 @@ -/* $Id: eqn.c,v 1.12 2011/07/21 10:24:35 kristaps Exp $ */ +/* $Id: eqn.c,v 1.16 2011/07/21 13:18:24 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -37,6 +37,11 @@ struct eqnpart { int (*fp)(struct eqn_node *); }; +struct eqnmark { + const char *name; + size_t sz; +}; + enum eqnpartt { EQN_DEFINE = 0, EQN_SET, @@ -44,21 +49,37 @@ enum eqnpartt { EQN__MAX }; +static void eqn_box_free(struct eqn_box *); static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); static int eqn_do_define(struct eqn_node *); -static int eqn_do_ign2(struct eqn_node *); +static int eqn_do_set(struct eqn_node *); static int eqn_do_undef(struct eqn_node *); static const char *eqn_nexttok(struct eqn_node *, size_t *); -static const char *eqn_next(struct eqn_node *, char, size_t *); -static int eqn_box(struct eqn_node *); +static const char *eqn_nextrawtok(struct eqn_node *, size_t *); +static const char *eqn_next(struct eqn_node *, + char, size_t *, int); +static int eqn_box(struct eqn_node *, + struct eqn_box *, struct eqn_box **); static const struct eqnpart eqnparts[EQN__MAX] = { { "define", 6, eqn_do_define }, /* EQN_DEFINE */ - { "set", 3, eqn_do_ign2 }, /* EQN_SET */ + { "set", 3, eqn_do_set }, /* EQN_SET */ { "undef", 5, eqn_do_undef }, /* EQN_UNDEF */ }; +static const struct eqnmark eqnmarks[EQNMARK__MAX] = { + { "", 0 }, /* EQNMARK_NONE */ + { "dot", 3 }, /* EQNMARK_DOT */ + { "dotdot", 6 }, /* EQNMARK_DOTDOT */ + { "hat", 3 }, /* EQNMARK_HAT */ + { "tilde", 5 }, /* EQNMARK_TILDE */ + { "vec", 3 }, /* EQNMARK_VEC */ + { "dyad", 4 }, /* EQNMARK_DYAD */ + { "bar", 3 }, /* EQNMARK_BAR */ + { "under", 5 }, /* EQNMARK_UNDER */ +}; + /* ARGSUSED */ enum rofferr eqn_read(struct eqn_node **epp, int ln, @@ -116,29 +137,53 @@ eqn_alloc(int pos, int line, struct mparse *parse) enum rofferr eqn_end(struct eqn_node *ep) { + struct eqn_box *root, *last; int c; - /* - * Validate the expression. - * Use the grammar found in the literature. - */ + ep->eqn.root = root = + mandoc_calloc(1, sizeof(struct eqn_box)); + root->type = EQN_ROOT; if (0 == ep->sz) return(ROFF_IGN); - while (1 == (c = eqn_box(ep))) - /* Keep parsing. */ ; + /* + * Run the parser. + * If we return before reaching the end of our input, our scope + * is still open somewhere. + * If we return alright but don't have a symmetric scoping, then + * something's not right either. + * Otherwise, return the equation. + */ - return(c < 0 ? ROFF_IGN : ROFF_EQN); + if (0 == (c = eqn_box(ep, root, &last))) { + if (last != root) { + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + c = 0; + } + } else if (c > 0) + EQN_MSG(MANDOCERR_EQNNSCOPE, ep); + + return(0 == c ? ROFF_EQN : ROFF_IGN); } static int -eqn_box(struct eqn_node *ep) +eqn_box(struct eqn_node *ep, struct eqn_box *last, struct eqn_box **sv) { size_t sz; const char *start; - int i; + int c, i, nextc; + struct eqn_box *bp; + /* + * Mark our last level of subexpression. + * Also mark whether that the next node should be a + * subexpression node. + */ + + *sv = last; + nextc = 1; +again: if (NULL == (start = eqn_nexttok(ep, &sz))) return(0); @@ -150,18 +195,53 @@ eqn_box(struct eqn_node *ep) if ( ! (*eqnparts[i].fp)(ep)) return(-1); - return(1); + goto again; } - ep->eqn.data = mandoc_realloc - (ep->eqn.data, ep->eqn.sz + sz + 1); + for (i = 0; i < (int)EQNMARK__MAX; i++) { + if (eqnmarks[i].sz != sz) + continue; + if (strncmp(eqnmarks[i].name, start, sz)) + continue; + last->mark = (enum eqn_markt)i; + goto again; + } - if (0 == ep->eqn.sz) - *ep->eqn.data = '\0'; + /* Exit this [hopefully] subexpression. */ - ep->eqn.sz += sz; - strlcat(ep->eqn.data, start, ep->eqn.sz + 1); - return(1); + if (sz == 1 && 0 == strncmp("}", start, 1)) + return(1); + + bp = mandoc_calloc(1, sizeof(struct eqn_box)); + if (nextc) + last->child = bp; + else + last->next = bp; + + last = bp; + + /* + * See if we're to open a new subexpression. + * If so, mark our node as such and descend. + */ + + if (sz == 1 && 0 == strncmp("{", start, 1)) { + bp->type = EQN_SUBEXPR; + c = eqn_box(ep, bp, sv); + + nextc = 0; + goto again; + } + + /* A regular text node. */ + + bp->type = EQN_TEXT; + bp->text = mandoc_malloc(sz + 1); + *bp->text = '\0'; + strlcat(bp->text, start, sz + 1); + + nextc = 0; + goto again; } void @@ -169,7 +249,7 @@ eqn_free(struct eqn_node *p) { int i; - free(p->eqn.data); + eqn_box_free(p->eqn.root); for (i = 0; i < (int)p->defsz; i++) { free(p->defs[i].key); @@ -181,15 +261,35 @@ eqn_free(struct eqn_node *p) free(p); } +static void +eqn_box_free(struct eqn_box *bp) +{ + + if (bp->child) + eqn_box_free(bp->child); + if (bp->next) + eqn_box_free(bp->next); + + free(bp->text); + free(bp); +} + static const char * +eqn_nextrawtok(struct eqn_node *ep, size_t *sz) +{ + + return(eqn_next(ep, '"', sz, 0)); +} + +static const char * eqn_nexttok(struct eqn_node *ep, size_t *sz) { - return(eqn_next(ep, '"', sz)); + return(eqn_next(ep, '"', sz, 1)); } static const char * -eqn_next(struct eqn_node *ep, char quote, size_t *sz) +eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) { char *start, *next; int q, diff, lim; @@ -199,6 +299,17 @@ eqn_next(struct eqn_node *ep, char quote, size_t *sz) if (NULL == sz) sz = &ssz; + lim = 0; + sv = ep->cur; +again: + /* Prevent self-definitions. */ + + if (lim >= EQN_NEST_MAX) { + EQN_MSG(MANDOCERR_EQNNEST, ep); + return(NULL); + } + + ep->cur = sv; start = &ep->data[(int)ep->cur]; q = 0; @@ -210,16 +321,6 @@ eqn_next(struct eqn_node *ep, char quote, size_t *sz) q = 1; } - lim = 0; - - sv = ep->cur; -again: - if (lim >= EQN_NEST_MAX) { - EQN_MSG(MANDOCERR_EQNNEST, ep); - return(NULL); - } - - ep->cur = sv; start = &ep->data[(int)ep->cur]; next = q ? strchr(start, quote) : strchr(start, ' '); @@ -238,6 +339,11 @@ again: ep->cur += *sz; } + /* Quotes aren't expanded for values. */ + + if (q || ! repl) + return(start); + if (NULL != (def = eqn_def_find(ep, start, *sz))) { diff = def->valsz - *sz; @@ -259,13 +365,13 @@ again: } static int -eqn_do_ign2(struct eqn_node *ep) +eqn_do_set(struct eqn_node *ep) { const char *start; - if (NULL == (start = eqn_nexttok(ep, NULL))) + if (NULL == (start = eqn_nextrawtok(ep, NULL))) EQN_MSG(MANDOCERR_EQNARGS, ep); - else if (NULL == (start = eqn_nexttok(ep, NULL))) + else if (NULL == (start = eqn_nextrawtok(ep, NULL))) EQN_MSG(MANDOCERR_EQNARGS, ep); else return(1); @@ -281,7 +387,7 @@ eqn_do_define(struct eqn_node *ep) struct eqn_def *def; int i; - if (NULL == (start = eqn_nexttok(ep, &sz))) { + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { EQN_MSG(MANDOCERR_EQNARGS, ep); return(0); } @@ -314,7 +420,7 @@ eqn_do_define(struct eqn_node *ep) def = &ep->defs[i]; } - start = eqn_next(ep, ep->data[(int)ep->cur], &sz); + start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); if (NULL == start) { EQN_MSG(MANDOCERR_EQNARGS, ep); @@ -322,7 +428,7 @@ eqn_do_define(struct eqn_node *ep) } def->valsz = sz; - def->val = mandoc_realloc(ep->defs[i].val, sz + 1); + def->val = mandoc_realloc(def->val, sz + 1); memcpy(def->val, start, sz); def->val[(int)sz] = '\0'; return(1); @@ -335,7 +441,7 @@ eqn_do_undef(struct eqn_node *ep) struct eqn_def *def; size_t sz; - if (NULL == (start = eqn_nexttok(ep, &sz))) { + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { EQN_MSG(MANDOCERR_EQNARGS, ep); return(0); } else if (NULL != (def = eqn_def_find(ep, start, sz)))