=================================================================== RCS file: /cvs/mandoc/eqn.c,v retrieving revision 1.25 retrieving revision 1.51 diff -u -p -r1.25 -r1.51 --- mandoc/eqn.c 2011/07/22 10:36:58 1.25 +++ mandoc/eqn.c 2014/10/10 14:27:46 1.51 @@ -1,6 +1,6 @@ -/* $Id: eqn.c,v 1.25 2011/07/22 10:36:58 kristaps Exp $ */ +/* $Id: eqn.c,v 1.51 2014/10/10 14:27:46 schwarze Exp $ */ /* - * Copyright (c) 2011 Kristaps Dzonsons + * Copyright (c) 2011, 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,10 +14,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif +#include + #include #include #include @@ -26,101 +26,249 @@ #include #include "mandoc.h" +#include "mandoc_aux.h" #include "libmandoc.h" #include "libroff.h" +#define EQN_MSG(t, x) \ + mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL) #define EQN_NEST_MAX 128 /* maximum nesting of defines */ -#define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL) - -enum eqn_rest { - EQN_DESCOPE, - EQN_ERR, - EQN_OK, - EQN_EOF -}; - -struct eqnstr { - const char *name; - size_t sz; -}; - #define STRNEQ(p1, sz1, p2, sz2) \ ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) #define EQNSTREQ(x, p, sz) \ STRNEQ((x)->name, (x)->sz, (p), (sz)) -struct eqnpart { - struct eqnstr str; - int (*fp)(struct eqn_node *); +enum eqn_tok { + EQN_TOK_DYAD = 0, + EQN_TOK_VEC, + EQN_TOK_UNDER, + EQN_TOK_BAR, + EQN_TOK_TILDE, + EQN_TOK_HAT, + EQN_TOK_DOT, + EQN_TOK_DOTDOT, + EQN_TOK_FWD, + EQN_TOK_BACK, + EQN_TOK_DOWN, + EQN_TOK_UP, + EQN_TOK_FAT, + EQN_TOK_ROMAN, + EQN_TOK_ITALIC, + EQN_TOK_BOLD, + EQN_TOK_SIZE, + EQN_TOK_SUB, + EQN_TOK_SUP, + EQN_TOK_SQRT, + EQN_TOK_OVER, + EQN_TOK_FROM, + EQN_TOK_TO, + EQN_TOK_BRACE_OPEN, + EQN_TOK_BRACE_CLOSE, + EQN_TOK_GSIZE, + EQN_TOK_GFONT, + EQN_TOK_MARK, + EQN_TOK_LINEUP, + EQN_TOK_LEFT, + EQN_TOK_RIGHT, + EQN_TOK_PILE, + EQN_TOK_LPILE, + EQN_TOK_RPILE, + EQN_TOK_CPILE, + EQN_TOK_MATRIX, + EQN_TOK_CCOL, + EQN_TOK_LCOL, + EQN_TOK_RCOL, + EQN_TOK_DELIM, + EQN_TOK_DEFINE, + EQN_TOK_TDEFINE, + EQN_TOK_NDEFINE, + EQN_TOK_UNDEF, + EQN_TOK_EOF, + EQN_TOK_ABOVE, + EQN_TOK__MAX }; -enum eqnpartt { - EQN_DEFINE = 0, - EQN_SET, - EQN_UNDEF, - EQN__MAX +static const char *eqn_toks[EQN_TOK__MAX] = { + "dyad", /* EQN_TOK_DYAD */ + "vec", /* EQN_TOK_VEC */ + "under", /* EQN_TOK_UNDER */ + "bar", /* EQN_TOK_BAR */ + "tilde", /* EQN_TOK_TILDE */ + "hat", /* EQN_TOK_HAT */ + "dot", /* EQN_TOK_DOT */ + "dotdot", /* EQN_TOK_DOTDOT */ + "fwd", /* EQN_TOK_FWD * */ + "back", /* EQN_TOK_BACK */ + "down", /* EQN_TOK_DOWN */ + "up", /* EQN_TOK_UP */ + "fat", /* EQN_TOK_FAT */ + "roman", /* EQN_TOK_ROMAN */ + "italic", /* EQN_TOK_ITALIC */ + "bold", /* EQN_TOK_BOLD */ + "size", /* EQN_TOK_SIZE */ + "sub", /* EQN_TOK_SUB */ + "sup", /* EQN_TOK_SUP */ + "sqrt", /* EQN_TOK_SQRT */ + "over", /* EQN_TOK_OVER */ + "from", /* EQN_TOK_FROM */ + "to", /* EQN_TOK_TO */ + "{", /* EQN_TOK_BRACE_OPEN */ + "}", /* EQN_TOK_BRACE_CLOSE */ + "gsize", /* EQN_TOK_GSIZE */ + "gfont", /* EQN_TOK_GFONT */ + "mark", /* EQN_TOK_MARK */ + "lineup", /* EQN_TOK_LINEUP */ + "left", /* EQN_TOK_LEFT */ + "right", /* EQN_TOK_RIGHT */ + "pile", /* EQN_TOK_PILE */ + "lpile", /* EQN_TOK_LPILE */ + "rpile", /* EQN_TOK_RPILE */ + "cpile", /* EQN_TOK_CPILE */ + "matrix", /* EQN_TOK_MATRIX */ + "ccol", /* EQN_TOK_CCOL */ + "lcol", /* EQN_TOK_LCOL */ + "rcol", /* EQN_TOK_RCOL */ + "delim", /* EQN_TOK_DELIM */ + "define", /* EQN_TOK_DEFINE */ + "tdefine", /* EQN_TOK_TDEFINE */ + "ndefine", /* EQN_TOK_NDEFINE */ + "undef", /* EQN_TOK_UNDEF */ + NULL, /* EQN_TOK_EOF */ + "above", /* EQN_TOK_ABOVE */ }; -static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *); -static struct eqn_box *eqn_box_alloc(struct eqn_box *); -static void eqn_box_free(struct eqn_box *); -static struct eqn_def *eqn_def_find(struct eqn_node *, - const char *, size_t); -static int eqn_do_define(struct eqn_node *); -static int eqn_do_set(struct eqn_node *); -static int eqn_do_undef(struct eqn_node *); -static enum eqn_rest eqn_eqn(struct eqn_node *, struct eqn_box *); -static enum eqn_rest eqn_list(struct eqn_node *, struct eqn_box *); -static const char *eqn_nexttok(struct eqn_node *, size_t *); -static const char *eqn_nextrawtok(struct eqn_node *, size_t *); -static const char *eqn_next(struct eqn_node *, - char, size_t *, int); -static void eqn_rewind(struct eqn_node *); - -static const struct eqnpart eqnparts[EQN__MAX] = { - { { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */ - { { "set", 3 }, eqn_do_set }, /* EQN_SET */ - { { "undef", 5 }, eqn_do_undef }, /* EQN_UNDEF */ +enum eqn_symt { + EQNSYM_alpha, + EQNSYM_beta, + EQNSYM_chi, + EQNSYM_delta, + EQNSYM_epsilon, + EQNSYM_eta, + EQNSYM_gamma, + EQNSYM_iota, + EQNSYM_kappa, + EQNSYM_lambda, + EQNSYM_mu, + EQNSYM_nu, + EQNSYM_omega, + EQNSYM_omicron, + EQNSYM_phi, + EQNSYM_pi, + EQNSYM_ps, + EQNSYM_rho, + EQNSYM_sigma, + EQNSYM_tau, + EQNSYM_theta, + EQNSYM_upsilon, + EQNSYM_xi, + EQNSYM_zeta, + EQNSYM_DELTA, + EQNSYM_GAMMA, + EQNSYM_LAMBDA, + EQNSYM_OMEGA, + EQNSYM_PHI, + EQNSYM_PI, + EQNSYM_PSI, + EQNSYM_SIGMA, + EQNSYM_THETA, + EQNSYM_UPSILON, + EQNSYM_XI, + EQNSYM_inter, + EQNSYM_union, + EQNSYM_prod, + EQNSYM_int, + EQNSYM_sum, + EQNSYM_grad, + EQNSYM_del, + EQNSYM_times, + EQNSYM_cdot, + EQNSYM_nothing, + EQNSYM_approx, + EQNSYM_prime, + EQNSYM_half, + EQNSYM_partial, + EQNSYM_inf, + EQNSYM_muchgreat, + EQNSYM_muchless, + EQNSYM_larrow, + EQNSYM_rarrow, + EQNSYM_pm, + EQNSYM_nequal, + EQNSYM_equiv, + EQNSYM_lessequal, + EQNSYM_moreequal, + EQNSYM__MAX }; -static const struct eqnstr eqnmarks[EQNMARK__MAX] = { - { "", 0 }, /* EQNMARK_NONE */ - { "dot", 3 }, /* EQNMARK_DOT */ - { "dotdot", 6 }, /* EQNMARK_DOTDOT */ - { "hat", 3 }, /* EQNMARK_HAT */ - { "tilde", 5 }, /* EQNMARK_TILDE */ - { "vec", 3 }, /* EQNMARK_VEC */ - { "dyad", 4 }, /* EQNMARK_DYAD */ - { "bar", 3 }, /* EQNMARK_BAR */ - { "under", 5 }, /* EQNMARK_UNDER */ +struct eqnsym { + const char *str; + const char *sym; }; -static const struct eqnstr eqnfonts[EQNFONT__MAX] = { - { "", 0 }, /* EQNFONT_NONE */ - { "roman", 5 }, /* EQNFONT_ROMAN */ - { "bold", 4 }, /* EQNFONT_BOLD */ - { "italic", 6 }, /* EQNFONT_ITALIC */ +static const struct eqnsym eqnsyms[EQNSYM__MAX] = { + { "alpha", "*a" }, /* EQNSYM_alpha */ + { "beta", "*b" }, /* EQNSYM_beta */ + { "chi", "*x" }, /* EQNSYM_chi */ + { "delta", "*d" }, /* EQNSYM_delta */ + { "epsilon", "*e" }, /* EQNSYM_epsilon */ + { "eta", "*y" }, /* EQNSYM_eta */ + { "gamma", "*g" }, /* EQNSYM_gamma */ + { "iota", "*i" }, /* EQNSYM_iota */ + { "kappa", "*k" }, /* EQNSYM_kappa */ + { "lambda", "*l" }, /* EQNSYM_lambda */ + { "mu", "*m" }, /* EQNSYM_mu */ + { "nu", "*n" }, /* EQNSYM_nu */ + { "omega", "*w" }, /* EQNSYM_omega */ + { "omicron", "*o" }, /* EQNSYM_omicron */ + { "phi", "*f" }, /* EQNSYM_phi */ + { "pi", "*p" }, /* EQNSYM_pi */ + { "psi", "*q" }, /* EQNSYM_psi */ + { "rho", "*r" }, /* EQNSYM_rho */ + { "sigma", "*s" }, /* EQNSYM_sigma */ + { "tau", "*t" }, /* EQNSYM_tau */ + { "theta", "*h" }, /* EQNSYM_theta */ + { "upsilon", "*u" }, /* EQNSYM_upsilon */ + { "xi", "*c" }, /* EQNSYM_xi */ + { "zeta", "*z" }, /* EQNSYM_zeta */ + { "DELTA", "*D" }, /* EQNSYM_DELTA */ + { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ + { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ + { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ + { "PHI", "*F" }, /* EQNSYM_PHI */ + { "PI", "*P" }, /* EQNSYM_PI */ + { "PSI", "*Q" }, /* EQNSYM_PSI */ + { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ + { "THETA", "*H" }, /* EQNSYM_THETA */ + { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ + { "XI", "*C" }, /* EQNSYM_XI */ + { "inter", "ca" }, /* EQNSYM_inter */ + { "union", "cu" }, /* EQNSYM_union */ + { "prod", "product" }, /* EQNSYM_prod */ + { "int", "integral" }, /* EQNSYM_int */ + { "sum", "sum" }, /* EQNSYM_sum */ + { "grad", "gr" }, /* EQNSYM_grad */ + { "del", "gr" }, /* EQNSYM_del */ + { "times", "mu" }, /* EQNSYM_times */ + { "cdot", "pc" }, /* EQNSYM_cdot */ + { "nothing", "&" }, /* EQNSYM_nothing */ + { "approx", "~~" }, /* EQNSYM_approx */ + { "prime", "aq" }, /* EQNSYM_prime */ + { "half", "12" }, /* EQNSYM_half */ + { "partial", "pd" }, /* EQNSYM_partial */ + { "inf", "if" }, /* EQNSYM_inf */ + { ">>", ">>" }, /* EQNSYM_muchgreat */ + { "<<", "<<" }, /* EQNSYM_muchless */ + { "<-", "<-" }, /* EQNSYM_larrow */ + { "->", "->" }, /* EQNSYM_rarrow */ + { "+-", "+-" }, /* EQNSYM_pm */ + { "!=", "!=" }, /* EQNSYM_nequal */ + { "==", "==" }, /* EQNSYM_equiv */ + { "<=", "<=" }, /* EQNSYM_lessequal */ + { ">=", ">=" }, /* EQNSYM_moreequal */ }; -static const struct eqnstr eqnposs[EQNPOS__MAX] = { - { "", 0 }, /* EQNPOS_NONE */ - { "over", 4 }, /* EQNPOS_OVER */ - { "sup", 3 }, /* EQNPOS_SUP */ - { "sub", 3 }, /* EQNPOS_SUB */ - { "to", 2 }, /* EQNPOS_TO */ - { "from", 4 }, /* EQNPOS_FROM */ -}; - -static const struct eqnstr eqnpiles[EQNPILE__MAX] = { - { "", 0 }, /* EQNPILE_NONE */ - { "cpile", 5 }, /* EQNPILE_CPILE */ - { "rpile", 5 }, /* EQNPILE_RPILE */ - { "lpile", 5 }, /* EQNPILE_LPILE */ -}; - -/* ARGSUSED */ enum rofferr -eqn_read(struct eqn_node **epp, int ln, +eqn_read(struct eqn_node **epp, int ln, const char *p, int pos, int *offs) { size_t sz; @@ -134,9 +282,15 @@ eqn_read(struct eqn_node **epp, int ln, * validate the full equation. */ - if (0 == strcmp(p, ".EN")) { - er = eqn_end(ep); - *epp = NULL; + if (0 == strncmp(p, ".EN", 3)) { + er = eqn_end(epp); + p += 3; + while (' ' == *p || '\t' == *p) + p++; + if ('\0' == *p) + return(er); + mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, + ln, pos, "EN %s", p); return(er); } @@ -160,305 +314,54 @@ eqn_read(struct eqn_node **epp, int ln, } struct eqn_node * -eqn_alloc(int pos, int line, struct mparse *parse) +eqn_alloc(const char *name, int pos, int line, struct mparse *parse) { struct eqn_node *p; + size_t sz; + const char *end; p = mandoc_calloc(1, sizeof(struct eqn_node)); + + if (name && '\0' != *name) { + sz = strlen(name); + assert(sz); + do { + sz--; + end = name + (int)sz; + } while (' ' == *end || '\t' == *end); + p->eqn.name = mandoc_strndup(name, sz + 1); + } + p->parse = parse; p->eqn.ln = line; p->eqn.pos = pos; + p->gsize = EQN_DEFSIZE; return(p); } -enum rofferr -eqn_end(struct eqn_node *ep) +/* + * Find the key "key" of the give size within our eqn-defined values. + */ +static struct eqn_def * +eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) { - struct eqn_box *root; - enum eqn_rest c; - - ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); - - root = ep->eqn.root; - root->type = EQN_ROOT; - - if (0 == ep->sz) - return(ROFF_IGN); - - if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) { - EQN_MSG(MANDOCERR_EQNNSCOPE, ep); - c = EQN_ERR; - } - - return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN); -} - -static enum eqn_rest -eqn_eqn(struct eqn_node *ep, struct eqn_box *last) -{ - struct eqn_box *bp; - enum eqn_rest c; - - bp = eqn_box_alloc(last); - bp->type = EQN_SUBEXPR; - - while (EQN_OK == (c = eqn_box(ep, bp))) - /* Spin! */ ; - - return(c); -} - -static enum eqn_rest -eqn_list(struct eqn_node *ep, struct eqn_box *last) -{ - struct eqn_box *bp; - const char *start; - size_t sz; - enum eqn_rest c; - - bp = eqn_box_alloc(last); - bp->type = EQN_LIST; - - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - if ( ! STRNEQ(start, sz, "{", 1)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(EQN_ERR); - } - - while (EQN_DESCOPE == (c = eqn_eqn(ep, bp))) { - eqn_rewind(ep); - start = eqn_nexttok(ep, &sz); - assert(start); - if ( ! STRNEQ(start, sz, "above", 5)) - break; - bp->last->above = 1; - } - - if (EQN_DESCOPE != c) { - if (EQN_ERR != c) - EQN_MSG(MANDOCERR_EQNSCOPE, ep); - return(EQN_ERR); - } - - eqn_rewind(ep); - start = eqn_nexttok(ep, &sz); - assert(start); - if (STRNEQ(start, sz, "}", 1)) - return(EQN_OK); - - EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); - return(EQN_ERR); -} - -static enum eqn_rest -eqn_box(struct eqn_node *ep, struct eqn_box *last) -{ - size_t sz; - const char *start; - char *left; - enum eqn_rest c; - int i, size; - struct eqn_box *bp; - - if (NULL == (start = eqn_nexttok(ep, &sz))) - return(EQN_EOF); - - if (STRNEQ(start, sz, "}", 1)) - return(EQN_DESCOPE); - else if (STRNEQ(start, sz, "right", 5)) - return(EQN_DESCOPE); - else if (STRNEQ(start, sz, "above", 5)) - return(EQN_DESCOPE); - - for (i = 0; i < (int)EQN__MAX; i++) { - if ( ! EQNSTREQ(&eqnparts[i].str, start, sz)) - continue; - return((*eqnparts[i].fp)(ep) ? EQN_OK : EQN_ERR); - } - - if (STRNEQ(start, sz, "{", 1)) { - if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) { - if (EQN_ERR != c) - EQN_MSG(MANDOCERR_EQNSCOPE, ep); - return(EQN_ERR); - } - eqn_rewind(ep); - start = eqn_nexttok(ep, &sz); - assert(start); - if (STRNEQ(start, sz, "}", 1)) - return(EQN_OK); - EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); - return(EQN_ERR); - } - - for (i = 0; i < (int)EQNPILE__MAX; i++) { - if ( ! EQNSTREQ(&eqnpiles[i], start, sz)) - continue; - if (EQN_OK == (c = eqn_list(ep, last))) - last->last->pile = (enum eqn_pilet)i; - return(c); - } - - if (STRNEQ(start, sz, "left", 4)) { - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - left = mandoc_strndup(start, sz); - if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) - return(c); - assert(last->last); - last->last->left = left; - eqn_rewind(ep); - start = eqn_nexttok(ep, &sz); - assert(start); - if (STRNEQ(start, sz, "right", 5)) - return(EQN_DESCOPE); - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - last->last->right = mandoc_strndup(start, sz); - return(EQN_OK); - } - - for (i = 0; i < (int)EQNPOS__MAX; i++) { - if ( ! EQNSTREQ(&eqnposs[i], start, sz)) - continue; - if (NULL == last->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(EQN_ERR); - } - last->last->pos = (enum eqn_post)i; - if (EQN_EOF == (c = eqn_box(ep, last))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - return(c); - } - - for (i = 0; i < (int)EQNMARK__MAX; i++) { - if ( ! EQNSTREQ(&eqnmarks[i], start, sz)) - continue; - if (NULL == last->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(EQN_ERR); - } - last->last->mark = (enum eqn_markt)i; - if (EQN_EOF == (c = eqn_box(ep, last))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - return(c); - } - - for (i = 0; i < (int)EQNFONT__MAX; i++) { - if ( ! EQNSTREQ(&eqnfonts[i], start, sz)) - continue; - if (EQN_EOF == (c = eqn_box(ep, last))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } else if (EQN_OK == c) - last->last->font = (enum eqn_fontt)i; - return(c); - } - - if (STRNEQ(start, sz, "size", 4)) { - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } - size = mandoc_strntoi(start, sz, 10); - if (EQN_EOF == (c = eqn_box(ep, last))) { - EQN_MSG(MANDOCERR_EQNEOF, ep); - return(EQN_ERR); - } else if (EQN_OK != c) - return(c); - last->last->size = size; - } - - bp = eqn_box_alloc(last); - bp->type = EQN_TEXT; - bp->text = mandoc_strndup(start, sz); - return(EQN_OK); -} - -void -eqn_free(struct eqn_node *p) -{ int i; - eqn_box_free(p->eqn.root); + for (i = 0; i < (int)ep->defsz; i++) + if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, + ep->defs[i].keysz, key, sz)) + return(&ep->defs[i]); - for (i = 0; i < (int)p->defsz; i++) { - free(p->defs[i].key); - free(p->defs[i].val); - } - - free(p->data); - free(p->defs); - free(p); + return(NULL); } -static struct eqn_box * -eqn_box_alloc(struct eqn_box *parent) -{ - struct eqn_box *bp; - - bp = mandoc_calloc(1, sizeof(struct eqn_box)); - bp->parent = parent; - bp->size = EQN_DEFSIZE; - - if (NULL == parent->first) - parent->first = bp; - else - parent->last->next = bp; - - parent->last = bp; - return(bp); -} - -static void -eqn_box_free(struct eqn_box *bp) -{ - - if (bp->first) - eqn_box_free(bp->first); - if (bp->next) - eqn_box_free(bp->next); - - free(bp->text); - free(bp->left); - free(bp->right); - free(bp); -} - +/* + * Get the next token from the input stream using the given quote + * character. + * Optionally make any replacements. + */ static const char * -eqn_nextrawtok(struct eqn_node *ep, size_t *sz) -{ - - return(eqn_next(ep, '"', sz, 0)); -} - -static const char * -eqn_nexttok(struct eqn_node *ep, size_t *sz) -{ - - return(eqn_next(ep, '"', sz, 1)); -} - -static void -eqn_rewind(struct eqn_node *ep) -{ - - ep->cur = ep->rew; -} - -static const char * eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) { char *start, *next; @@ -475,7 +378,7 @@ again: /* Prevent self-definitions. */ if (lim >= EQN_NEST_MAX) { - EQN_MSG(MANDOCERR_EQNNEST, ep); + EQN_MSG(MANDOCERR_ROFFLOOP, ep); return(NULL); } @@ -497,7 +400,7 @@ again: if ('{' == *start || '}' == *start) ssz = 1; else - ssz = strcspn(start + 1, " ~\"{}\t") + 1; + ssz = strcspn(start + 1, " ^~\"{}\t") + 1; next = start + (int)ssz; if ('\0' == *next) next = NULL; @@ -510,12 +413,13 @@ again: if (q) ep->cur++; while (' ' == ep->data[(int)ep->cur] || - '\t' == ep->data[(int)ep->cur] || - '~' == ep->data[(int)ep->cur]) + '\t' == ep->data[(int)ep->cur] || + '^' == ep->data[(int)ep->cur] || + '~' == ep->data[(int)ep->cur]) ep->cur++; } else { if (q) - EQN_MSG(MANDOCERR_BADQUOTE, ep); + EQN_MSG(MANDOCERR_ARG_QUOTE, ep); next = strchr(start, '\0'); *sz = (size_t)(next - start); ep->cur += *sz; @@ -537,8 +441,8 @@ again: } diff = def->valsz - *sz; - memmove(start + *sz + diff, start + *sz, - (strlen(start) - *sz) + 1); + memmove(start + *sz + diff, start + *sz, + (strlen(start) - *sz) + 1); memcpy(start, def->val, def->valsz); goto again; } @@ -546,23 +450,154 @@ again: return(start); } +/* + * Get the next delimited token using the default current quote + * character. + */ +static const char * +eqn_nexttok(struct eqn_node *ep, size_t *sz) +{ + + return(eqn_next(ep, '"', sz, 1)); +} + +/* + * Get next token without replacement. + */ +static const char * +eqn_nextrawtok(struct eqn_node *ep, size_t *sz) +{ + + return(eqn_next(ep, '"', sz, 0)); +} + +/* + * Parse a token from the stream of text. + * A token consists of one of the recognised eqn(7) strings. + * Strings are separated by delimiting marks. + * This returns EQN_TOK_EOF when there are no more tokens. + * If the token is an unrecognised string literal, then it returns + * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated + * string. + * This must be later freed with free(3). + */ +static enum eqn_tok +eqn_tok_parse(struct eqn_node *ep, char **p) +{ + const char *start; + size_t i, sz; + + if (NULL != p) + *p = NULL; + + if (NULL == (start = eqn_nexttok(ep, &sz))) + return(EQN_TOK_EOF); + + for (i = 0; i < EQN_TOK__MAX; i++) { + if (NULL == eqn_toks[i]) + continue; + if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) + break; + } + + if (i == EQN_TOK__MAX && NULL != p) + *p = mandoc_strndup(start, sz); + + return(i); +} + +static void +eqn_box_free(struct eqn_box *bp) +{ + + if (bp->first) + eqn_box_free(bp->first); + if (bp->next) + eqn_box_free(bp->next); + + free(bp->text); + free(bp->left); + free(bp->right); + free(bp->top); + free(bp->bottom); + free(bp); +} + +/* + * Allocate a box as the last child of the parent node. + */ +static struct eqn_box * +eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) +{ + struct eqn_box *bp; + + bp = mandoc_calloc(1, sizeof(struct eqn_box)); + bp->parent = parent; + bp->parent->args++; + bp->expectargs = UINT_MAX; + bp->size = ep->gsize; + + if (NULL != parent->first) { + parent->last->next = bp; + bp->prev = parent->last; + } else + parent->first = bp; + + parent->last = bp; + return(bp); +} + +/* + * Reparent the current last node (of the current parent) under a new + * EQN_SUBEXPR as the first element. + * Then return the new parent. + * The new EQN_SUBEXPR will have a two-child limit. + */ +static struct eqn_box * +eqn_box_makebinary(struct eqn_node *ep, + enum eqn_post pos, struct eqn_box *parent) +{ + struct eqn_box *b, *newb; + + assert(NULL != parent->last); + b = parent->last; + if (parent->last == parent->first) + parent->first = NULL; + parent->args--; + parent->last = b->prev; + b->prev = NULL; + newb = eqn_box_alloc(ep, parent); + newb->pos = pos; + newb->type = EQN_SUBEXPR; + newb->expectargs = 2; + newb->args = 1; + newb->first = newb->last = b; + newb->first->next = NULL; + b->parent = newb; + return(newb); +} + +/* + * Undefine a previously-defined string. + */ static int -eqn_do_set(struct eqn_node *ep) +eqn_undef(struct eqn_node *ep) { const char *start; + struct eqn_def *def; + size_t sz; - if (NULL == (start = eqn_nextrawtok(ep, NULL))) - EQN_MSG(MANDOCERR_EQNARGS, ep); - else if (NULL == (start = eqn_nextrawtok(ep, NULL))) - EQN_MSG(MANDOCERR_EQNARGS, ep); - else - return(1); + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } else if (NULL != (def = eqn_def_find(ep, start, sz))) + def->keysz = 0; - return(0); + return(1); } static int -eqn_do_define(struct eqn_node *ep) +eqn_def(struct eqn_node *ep) { const char *start; size_t sz; @@ -570,15 +605,14 @@ eqn_do_define(struct eqn_node *ep) int i; if (NULL == (start = eqn_nextrawtok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNARGS, ep); + EQN_MSG(MANDOCERR_EQNEOF, ep); return(0); } - /* - * Search for a key that already exists. + /* + * Search for a key that already exists. * Create a new key if none is found. */ - if (NULL == (def = eqn_def_find(ep, start, sz))) { /* Find holes in string array. */ for (i = 0; i < (int)ep->defsz; i++) @@ -587,15 +621,14 @@ eqn_do_define(struct eqn_node *ep) if (i == (int)ep->defsz) { ep->defsz++; - ep->defs = mandoc_realloc - (ep->defs, ep->defsz * - sizeof(struct eqn_def)); + ep->defs = mandoc_reallocarray(ep->defs, + ep->defsz, sizeof(struct eqn_def)); ep->defs[i].key = ep->defs[i].val = NULL; } ep->defs[i].keysz = sz; - ep->defs[i].key = mandoc_realloc - (ep->defs[i].key, sz + 1); + ep->defs[i].key = mandoc_realloc( + ep->defs[i].key, sz + 1); memcpy(ep->defs[i].key, start, sz); ep->defs[i].key[(int)sz] = '\0'; @@ -605,8 +638,8 @@ eqn_do_define(struct eqn_node *ep) start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); if (NULL == start) { - EQN_MSG(MANDOCERR_EQNARGS, ep); - return(0); + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(-1); } def->valsz = sz; @@ -616,31 +649,449 @@ eqn_do_define(struct eqn_node *ep) return(1); } +/* + * Recursively parse an eqn(7) expression. + */ static int -eqn_do_undef(struct eqn_node *ep) +eqn_parse(struct eqn_node *ep, struct eqn_box *parent) { + char *p; + enum eqn_tok tok; + enum eqn_post pos; + struct eqn_box *cur; + int rc, size; + size_t i, sz; + char sym[64]; const char *start; - struct eqn_def *def; - size_t sz; - if (NULL == (start = eqn_nextrawtok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNARGS, ep); + assert(NULL != parent); +again: + + switch ((tok = eqn_tok_parse(ep, &p))) { + case (EQN_TOK_UNDEF): + if ((rc = eqn_undef(ep)) <= 0) + return(rc); + break; + case (EQN_TOK_NDEFINE): + case (EQN_TOK_DEFINE): + if ((rc = eqn_def(ep)) <= 0) + return(rc); + break; + case (EQN_TOK_TDEFINE): + if (NULL == eqn_nextrawtok(ep, NULL)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else if (NULL == eqn_next(ep, + ep->data[(int)ep->cur], NULL, 0)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + break; + case (EQN_TOK_DELIM): + case (EQN_TOK_GFONT): + if (NULL == eqn_nextrawtok(ep, NULL)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + break; + case (EQN_TOK_MARK): + case (EQN_TOK_LINEUP): + /* Ignore these. */ + break; + case (EQN_TOK_DYAD): + case (EQN_TOK_VEC): + case (EQN_TOK_UNDER): + case (EQN_TOK_BAR): + case (EQN_TOK_TILDE): + case (EQN_TOK_HAT): + case (EQN_TOK_DOT): + case (EQN_TOK_DOTDOT): + if (NULL == parent->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + switch (tok) { + case (EQN_TOK_DOTDOT): + strlcpy(sym, "\\[ad]", sizeof(sym)); + break; + case (EQN_TOK_VEC): + strlcpy(sym, "\\[->]", sizeof(sym)); + break; + case (EQN_TOK_DYAD): + strlcpy(sym, "\\[<>]", sizeof(sym)); + break; + case (EQN_TOK_TILDE): + strlcpy(sym, "\\[a~]", sizeof(sym)); + break; + case (EQN_TOK_UNDER): + strlcpy(sym, "\\[ul]", sizeof(sym)); + break; + case (EQN_TOK_BAR): + strlcpy(sym, "\\[rl]", sizeof(sym)); + break; + case (EQN_TOK_DOT): + strlcpy(sym, "\\[a.]", sizeof(sym)); + break; + case (EQN_TOK_HAT): + strlcpy(sym, "\\[ha]", sizeof(sym)); + break; + default: + abort(); + } + + switch (tok) { + case (EQN_TOK_DOTDOT): + case (EQN_TOK_VEC): + case (EQN_TOK_DYAD): + case (EQN_TOK_TILDE): + case (EQN_TOK_BAR): + case (EQN_TOK_DOT): + case (EQN_TOK_HAT): + parent->top = mandoc_strdup(sym); + break; + case (EQN_TOK_UNDER): + parent->bottom = mandoc_strdup(sym); + break; + default: + abort(); + } + parent = parent->parent; + break; + case (EQN_TOK_FWD): + case (EQN_TOK_BACK): + case (EQN_TOK_DOWN): + case (EQN_TOK_UP): + tok = eqn_tok_parse(ep, NULL); + if (EQN_TOK__MAX != tok) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + break; + case (EQN_TOK_FAT): + case (EQN_TOK_ROMAN): + case (EQN_TOK_ITALIC): + case (EQN_TOK_BOLD): + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + /* + * These values apply to the next word or sequence of + * words; thus, we mark that we'll have a child with + * exactly one of those. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + switch (tok) { + case (EQN_TOK_FAT): + parent->font = EQNFONT_FAT; + break; + case (EQN_TOK_ROMAN): + parent->font = EQNFONT_ROMAN; + break; + case (EQN_TOK_ITALIC): + parent->font = EQNFONT_ITALIC; + break; + case (EQN_TOK_BOLD): + parent->font = EQNFONT_BOLD; + break; + default: + abort(); + } + break; + case (EQN_TOK_SIZE): + case (EQN_TOK_GSIZE): + /* Accept two values: integral size and a single. */ + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + size = mandoc_strntoi(start, sz, 10); + if (-1 == size) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (EQN_TOK_GSIZE == tok) { + ep->gsize = size; + break; + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + parent->size = size; + break; + case (EQN_TOK_FROM): + case (EQN_TOK_TO): + case (EQN_TOK_SUB): + case (EQN_TOK_SUP): + /* + * We have a left-right-associative expression. + * Repivot under a positional node, open a child scope + * and keep on reading. + */ + if (NULL == parent->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + /* Handle the "subsup" and "fromto" positions. */ + if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { + parent->expectargs = 3; + parent->pos = EQNPOS_SUBSUP; + break; + } + if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { + parent->expectargs = 3; + parent->pos = EQNPOS_FROMTO; + break; + } + switch (tok) { + case (EQN_TOK_FROM): + pos = EQNPOS_FROM; + break; + case (EQN_TOK_TO): + pos = EQNPOS_TO; + break; + case (EQN_TOK_SUP): + pos = EQNPOS_SUP; + break; + case (EQN_TOK_SUB): + pos = EQNPOS_SUB; + break; + default: + abort(); + } + parent = eqn_box_makebinary(ep, pos, parent); + break; + case (EQN_TOK_SQRT): + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + /* + * Accept a left-right-associative set of arguments just + * like sub and sup and friends but without rebalancing + * under a pivot. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_SUBEXPR; + parent->pos = EQNPOS_SQRT; + parent->expectargs = 1; + break; + case (EQN_TOK_OVER): + /* + * We have a right-left-associative fraction. + * Close out anything that's currently open, then + * rebalance and continue reading. + */ + if (NULL == parent->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + while (EQN_SUBEXPR == parent->type) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); + break; + case (EQN_TOK_RIGHT): + case (EQN_TOK_BRACE_CLOSE): + /* + * Close out the existing brace. + * FIXME: this is a shitty sentinel: we should really + * have a native EQN_BRACE type or whatnot. + */ + while (parent->type != EQN_LIST) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (EQN_TOK_RIGHT == tok) { + if (NULL == parent->left) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + /* Handling depends on right/left. */ + if (STRNEQ(start, sz, "ceiling", 7)) { + strlcpy(sym, "\\[rc]", sizeof(sym)); + parent->right = mandoc_strdup(sym); + } else if (STRNEQ(start, sz, "floor", 5)) { + strlcpy(sym, "\\[rf]", sizeof(sym)); + parent->right = mandoc_strdup(sym); + } else + parent->right = mandoc_strndup(start, sz); + } + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (EQN_TOK_BRACE_CLOSE == tok && parent && + (parent->type == EQN_PILE || + parent->type == EQN_MATRIX)) + parent = parent->parent; + /* Close out any "singleton" lists. */ + while (parent->type == EQN_LISTONE && + parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + break; + case (EQN_TOK_BRACE_OPEN): + case (EQN_TOK_LEFT): + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more + * (just like with the text node). + */ + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + if (EQN_TOK_LEFT == tok) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + /* Handling depends on right/left. */ + if (STRNEQ(start, sz, "ceiling", 7)) { + strlcpy(sym, "\\[lc]", sizeof(sym)); + parent->left = mandoc_strdup(sym); + } else if (STRNEQ(start, sz, "floor", 5)) { + strlcpy(sym, "\\[lf]", sizeof(sym)); + parent->left = mandoc_strdup(sym); + } else + parent->left = mandoc_strndup(start, sz); + } + break; + case (EQN_TOK_PILE): + case (EQN_TOK_LPILE): + case (EQN_TOK_RPILE): + case (EQN_TOK_CPILE): + case (EQN_TOK_CCOL): + case (EQN_TOK_LCOL): + case (EQN_TOK_RCOL): + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_PILE; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + break; + case (EQN_TOK_ABOVE): + while (parent->type != EQN_PILE) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + break; + case (EQN_TOK_MATRIX): + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_MATRIX; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + break; + case (EQN_TOK_EOF): + /* + * End of file! + * TODO: make sure we're not in an open subexpression. + */ return(0); - } else if (NULL != (def = eqn_def_find(ep, start, sz))) - def->keysz = 0; + default: + assert(tok == EQN_TOK__MAX); + assert(NULL != p); + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more. + */ + while (parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + free(p); + return(-1); + } + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + for (i = 0; i < EQNSYM__MAX; i++) + if (0 == strcmp(eqnsyms[i].str, p)) { + (void)snprintf(sym, sizeof(sym), + "\\[%s]", eqnsyms[i].sym); + cur->text = mandoc_strdup(sym); + free(p); + break; + } - return(1); + if (i == EQNSYM__MAX) + cur->text = p; + /* + * Post-process list status. + */ + while (parent->type == EQN_LISTONE && + parent->args == parent->expectargs) + if (NULL == (parent = parent->parent)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(-1); + } + break; + } + goto again; } -static struct eqn_def * -eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) +enum rofferr +eqn_end(struct eqn_node **epp) { + struct eqn_node *ep; + + ep = *epp; + *epp = NULL; + + ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); + ep->eqn.root->expectargs = UINT_MAX; + return(0 == eqn_parse(ep, ep->eqn.root) ? ROFF_EQN : ROFF_IGN); +} + +void +eqn_free(struct eqn_node *p) +{ int i; - for (i = 0; i < (int)ep->defsz; i++) - if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, - ep->defs[i].keysz, key, sz)) - return(&ep->defs[i]); + eqn_box_free(p->eqn.root); - return(NULL); + for (i = 0; i < (int)p->defsz; i++) { + free(p->defs[i].key); + free(p->defs[i].val); + } + + free(p->eqn.name); + free(p->data); + free(p->defs); + free(p); }