=================================================================== RCS file: /cvs/mandoc/Attic/macro.c,v retrieving revision 1.5 retrieving revision 1.49 diff -u -p -r1.5 -r1.49 --- mandoc/Attic/macro.c 2008/12/17 17:18:38 1.5 +++ mandoc/Attic/macro.c 2009/01/22 14:56:21 1.49 @@ -1,4 +1,4 @@ -/* $Id: macro.c,v 1.5 2008/12/17 17:18:38 kristaps Exp $ */ +/* $Id: macro.c,v 1.49 2009/01/22 14:56:21 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -21,421 +21,1276 @@ #include #include #include +#ifdef __linux__ +#include +#endif +/* + * This has scanning/parsing routines, each of which extract a macro and + * its arguments and parameters, then know how to progress to the next + * macro. Macros are parsed according as follows: + * + * ELEMENT: TEXT | epsilon + * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT + * BLOCK_TAIL: TAIL | epsilon + * HEAD: ELEMENT | TEXT | BLOCK | epsilon + * BODY: ELEMENT | TEXT | BLOCK | epsilon + * TAIL: TEXT | epsilon + * PUNCT: TEXT (delimiters) | epsilon + * + * These are arranged into a parse tree, an example of which follows: + * + * ROOT + * BLOCK (.Sh) + * HEAD + * TEXT (`NAME') + * BODY + * ELEMENT (.Nm) + * TEXT (`mdocml') + * ELEMENT (.Nd) + * TEXT (`mdoc macro compiler') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * ELEMENT (.Fl) + * TEXT (`W') + * ELEMENT (.Ns) + * ELEMENT (.Ar) + * TEXT (`err...') + * + * These types are always per-line except for block bodies, which may + * span multiple lines. Macros are assigned a parsing routine, which + * corresponds to the type, in the mdoc_macros table. + * + * Note that types are general: there can be several parsing routines + * corresponding to a single type. The macro_text function, for + * example, parses an ELEMENT type (see the function definition for + * details) that may be interrupted by further macros; the + * macro_constant function, on the other hand, parses an ELEMENT type + * spanning a single line. + */ + #include "private.h" -#define _CC(p) ((const char **)p) +#define REWIND_REWIND (1 << 0) +#define REWIND_NOHALT (1 << 1) +#define REWIND_HALT (1 << 2) -static int xstrlcat(char *, const char *, size_t); -static int xstrlcpy(char *, const char *, size_t); -static int xstrcmp(const char *, const char *); -static int append_text(struct mdoc *, int, - int, int, char *[]); -static int append_scoped(struct mdoc *, int, - int, int, char *[]); -static int args_next(struct mdoc *, int, - int *, char *, char **); +static int rewind_dohalt(int, enum mdoc_type, + const struct mdoc_node *); +static int rewind_alt(int); +static int rewind_dobreak(int, const struct mdoc_node *); +static int rewind_elem(struct mdoc *, int); +static int rewind_impblock(struct mdoc *, int, int, int); +static int rewind_expblock(struct mdoc *, int, int, int); +static int rewind_subblock(enum mdoc_type, + struct mdoc *, int, int, int); +static int rewind_last(struct mdoc *, struct mdoc_node *); +static int append_delims(struct mdoc *, int, int *, char *); +static int lookup(struct mdoc *, int, int, int, const char *); +static int pwarn(struct mdoc *, int, int, int); +static int perr(struct mdoc *, int, int, int); +#define WMACPARM (1) +#define WOBS (2) +#define ENOCTX (1) +#define ENOPARMS (2) +#define EARGVLIM (3) + + static int -args_next(struct mdoc *mdoc, int tok, - int *pos, char *buf, char **v) +perr(struct mdoc *mdoc, int line, int pos, int type) { + int c; - if (0 == buf[*pos]) - return(0); + switch (type) { + case (ENOCTX): + c = mdoc_perr(mdoc, line, pos, + "closing macro has prior context"); + break; + case (ENOPARMS): + c = mdoc_perr(mdoc, line, pos, + "macro doesn't expect parameters"); + break; + case (EARGVLIM): + c = mdoc_perr(mdoc, line, pos, + "argument hard-limit %d reached", + MDOC_LINEARG_MAX); + break; + default: + abort(); + /* NOTREACHED */ + } + return(c); +} - assert( ! isspace(buf[*pos])); +static int +pwarn(struct mdoc *mdoc, int line, int pos, int type) +{ + int c; - if ('\"' == buf[*pos]) { - (void)mdoc_err(mdoc, tok, *pos, ERR_SYNTAX_QUOTE); - return(-1); + switch (type) { + case (WMACPARM): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "macro-like parameter"); + break; + case (WOBS): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "macro is marked obsolete"); + break; + default: + abort(); + /* NOTREACHED */ } + return(c); +} - *v = &buf[*pos]; - /* Scan ahead to end of token. */ +static int +lookup(struct mdoc *mdoc, int line, int pos, int from, const char *p) +{ + int res; - while (buf[*pos] && ! isspace(buf[*pos])) - (*pos)++; - - if (buf[*pos] && buf[*pos + 1] && '\\' == buf[*pos]) { - (void)mdoc_err(mdoc, tok, *pos, ERR_SYNTAX_WS); + res = mdoc_find(mdoc, p); + if (MDOC_PARSED & mdoc_macros[from].flags) + return(res); + if (MDOC_MAX == res) + return(res); + if ( ! pwarn(mdoc, line, pos, WMACPARM)) return(-1); - } + return(MDOC_MAX); +} - if (0 == buf[*pos]) - return(1); - /* Scan ahead over trailing whitespace. */ +static int +rewind_last(struct mdoc *mdoc, struct mdoc_node *to) +{ - buf[(*pos)++] = 0; - while (buf[*pos] && isspace(buf[*pos])) - (*pos)++; + assert(to); + mdoc->next = MDOC_NEXT_SIBLING; - if (0 == buf[*pos]) - if ( ! mdoc_warn(mdoc, tok, *pos, WARN_SYNTAX_WS_EOLN)) - return(-1); + /* LINTED */ + while (mdoc->last != to) { + if ( ! mdoc_valid_post(mdoc)) + return(0); + if ( ! mdoc_action_post(mdoc)) + return(0); + mdoc->last = mdoc->last->parent; + assert(mdoc->last); + } - return(1); + if ( ! mdoc_valid_post(mdoc)) + return(0); + return(mdoc_action_post(mdoc)); } static int -append_scoped(struct mdoc *mdoc, int tok, - int pos, int sz, char *args[]) +rewind_alt(int tok) { - enum mdoc_sec sec; - - if (0 == sz) - return(mdoc_err(mdoc, tok, pos, ERR_ARGS_GE1)); - switch (tok) { - /* ======= ADD MORE MACRO CHECKS BELOW. ======= */ - case (MDOC_Sh): - sec = mdoc_atosec((size_t)sz, _CC(args)); - if (SEC_CUSTOM != sec && sec < mdoc->sec_lastn) - if ( ! mdoc_warn(mdoc, tok, pos, WARN_SEC_OO)) - return(0); - - if (SEC_BODY == mdoc->sec_last && SEC_NAME != sec) - return(mdoc_err(mdoc, tok, pos, ERR_SEC_NAME)); - - if (SEC_CUSTOM != sec) - mdoc->sec_lastn = sec; - mdoc->sec_last = sec; - break; - case (MDOC_Ss): - break; - /* ======= ADD MORE MACRO CHECKS ABOVE. ======= */ + case (MDOC_Ac): + return(MDOC_Ao); + case (MDOC_Bc): + return(MDOC_Bo); + case (MDOC_Dc): + return(MDOC_Do); + case (MDOC_Ec): + return(MDOC_Eo); + case (MDOC_Ed): + return(MDOC_Bd); + case (MDOC_Ef): + return(MDOC_Bf); + case (MDOC_Ek): + return(MDOC_Bk); + case (MDOC_El): + return(MDOC_Bl); + case (MDOC_Fc): + return(MDOC_Fo); + case (MDOC_Oc): + return(MDOC_Oo); + case (MDOC_Pc): + return(MDOC_Po); + case (MDOC_Qc): + return(MDOC_Qo); + case (MDOC_Re): + return(MDOC_Rs); + case (MDOC_Sc): + return(MDOC_So); + case (MDOC_Xc): + return(MDOC_Xo); default: - abort(); - /* NOTREACHED */ + break; } - - assert(sz >= 0); - args[sz] = NULL; - mdoc_block_alloc(mdoc, pos, tok, 0, NULL); - mdoc_head_alloc(mdoc, pos, tok, (size_t)sz, _CC(args)); - mdoc_body_alloc(mdoc, pos, tok); - return(1); + abort(); + /* NOTREACHED */ } static int -append_text(struct mdoc *mdoc, int tok, - int pos, int sz, char *args[]) +rewind_dohalt(int tok, enum mdoc_type type, const struct mdoc_node *p) { - assert(sz >= 0); - args[sz] = NULL; + if (MDOC_ROOT == p->type) + return(REWIND_HALT); + if (MDOC_VALID & p->flags) + return(REWIND_NOHALT); switch (tok) { - /* ======= ADD MORE MACRO CHECKS BELOW. ======= */ - case (MDOC_Ft): + /* One-liner implicit-scope. */ + case (MDOC_Aq): /* FALLTHROUGH */ - case (MDOC_Li): + case (MDOC_Bq): /* FALLTHROUGH */ - case (MDOC_Ms): + case (MDOC_D1): /* FALLTHROUGH */ - case (MDOC_Pa): + case (MDOC_Dl): /* FALLTHROUGH */ - case (MDOC_Tn): - if (0 < sz) - break; - if ( ! mdoc_warn(mdoc, tok, pos, WARN_ARGS_GE1)) - return(0); + case (MDOC_Dq): + /* FALLTHROUGH */ + case (MDOC_Op): + /* FALLTHROUGH */ + case (MDOC_Pq): + /* FALLTHROUGH */ + case (MDOC_Ql): + /* FALLTHROUGH */ + case (MDOC_Qq): + /* FALLTHROUGH */ + case (MDOC_Sq): + assert(MDOC_HEAD != type); + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); break; - case (MDOC_Ar): + + /* Multi-line implicit-scope. */ + case (MDOC_It): + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + if (MDOC_BODY == p->type && MDOC_Bl == p->tok) + return(REWIND_HALT); + break; + case (MDOC_Sh): + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + break; + case (MDOC_Ss): + assert(MDOC_TAIL != type); + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); + if (MDOC_BODY == p->type && MDOC_Sh == p->tok) + return(REWIND_HALT); + break; + + /* Multi-line explicit scope start. */ + case (MDOC_Ao): /* FALLTHROUGH */ - case (MDOC_Cm): + case (MDOC_Bd): /* FALLTHROUGH */ - case (MDOC_Fl): + case (MDOC_Bf): + /* FALLTHROUGH */ + case (MDOC_Bk): + /* FALLTHROUGH */ + case (MDOC_Bl): + /* FALLTHROUGH */ + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Eo): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Rs): + /* FALLTHROUGH */ + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Xo): + if (type == p->type && tok == p->tok) + return(REWIND_REWIND); break; - case (MDOC_Ad): + + /* Multi-line explicit scope close. */ + case (MDOC_Ac): /* FALLTHROUGH */ - case (MDOC_Em): + case (MDOC_Bc): /* FALLTHROUGH */ - case (MDOC_Er): + case (MDOC_Dc): /* FALLTHROUGH */ - case (MDOC_Ev): + case (MDOC_Ec): /* FALLTHROUGH */ - case (MDOC_Fa): + case (MDOC_Ed): /* FALLTHROUGH */ - case (MDOC_Dv): + case (MDOC_Ek): /* FALLTHROUGH */ - case (MDOC_Ic): + case (MDOC_El): /* FALLTHROUGH */ - case (MDOC_Va): + case (MDOC_Fc): /* FALLTHROUGH */ - case (MDOC_Vt): - if (0 < sz) - break; - return(mdoc_err(mdoc, tok, pos, ERR_ARGS_GE1)); - /* ======= ADD MORE MACRO CHECKS ABOVE. ======= */ + case (MDOC_Ef): + /* FALLTHROUGH */ + case (MDOC_Oc): + /* FALLTHROUGH */ + case (MDOC_Pc): + /* FALLTHROUGH */ + case (MDOC_Qc): + /* FALLTHROUGH */ + case (MDOC_Re): + /* FALLTHROUGH */ + case (MDOC_Sc): + /* FALLTHROUGH */ + case (MDOC_Xc): + if (type == p->type && rewind_alt(tok) == p->tok) + return(REWIND_REWIND); + break; default: abort(); /* NOTREACHED */ } - mdoc_elem_alloc(mdoc, pos, tok, 0, - NULL, (size_t)sz, _CC(args)); + return(REWIND_NOHALT); +} + + +static int +rewind_dobreak(int tok, const struct mdoc_node *p) +{ + + assert(MDOC_ROOT != p->type); + if (MDOC_ELEM == p->type) + return(1); + if (MDOC_TEXT == p->type) + return(1); + if (MDOC_VALID & p->flags) + return(1); + + switch (tok) { + /* Implicit rules. */ + case (MDOC_It): + return(MDOC_It == p->tok); + case (MDOC_Ss): + return(MDOC_Ss == p->tok); + case (MDOC_Sh): + if (MDOC_Ss == p->tok) + return(1); + return(MDOC_Sh == p->tok); + + /* Extra scope rules. */ + case (MDOC_El): + if (MDOC_It == p->tok) + return(1); + break; + default: + break; + } + + if (MDOC_EXPLICIT & mdoc_macros[tok].flags) + return(p->tok == rewind_alt(tok)); + else if (MDOC_BLOCK == p->type) + return(1); + + return(tok == p->tok); +} + + +static int +rewind_elem(struct mdoc *mdoc, int tok) +{ + struct mdoc_node *n; + + n = mdoc->last; + if (MDOC_ELEM != n->type) + n = n->parent; + assert(MDOC_ELEM == n->type); + assert(tok == n->tok); + + return(rewind_last(mdoc, n)); +} + + +static int +rewind_subblock(enum mdoc_type type, struct mdoc *mdoc, + int tok, int line, int ppos) +{ + struct mdoc_node *n; + int c; + + /* LINTED */ + for (n = mdoc->last; n; n = n->parent) { + c = rewind_dohalt(tok, type, n); + if (REWIND_HALT == c) + return(1); + if (REWIND_REWIND == c) + break; + else if (rewind_dobreak(tok, n)) + continue; + return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n))); + } + + assert(n); + return(rewind_last(mdoc, n)); +} + + +static int +rewind_expblock(struct mdoc *mdoc, int tok, int line, int ppos) +{ + struct mdoc_node *n; + int c; + + /* LINTED */ + for (n = mdoc->last; n; n = n->parent) { + c = rewind_dohalt(tok, MDOC_BLOCK, n); + if (REWIND_HALT == c) + return(perr(mdoc, line, ppos, ENOCTX)); + if (REWIND_REWIND == c) + break; + else if (rewind_dobreak(tok, n)) + continue; + return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n))); + } + + assert(n); + return(rewind_last(mdoc, n)); +} + + +static int +rewind_impblock(struct mdoc *mdoc, int tok, int line, int ppos) +{ + struct mdoc_node *n; + int c; + + /* LINTED */ + for (n = mdoc->last; n; n = n->parent) { + c = rewind_dohalt(tok, MDOC_BLOCK, n); + if (REWIND_HALT == c) + return(1); + else if (REWIND_REWIND == c) + break; + else if (rewind_dobreak(tok, n)) + continue; + return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n))); + } + + assert(n); + return(rewind_last(mdoc, n)); +} + + +static int +append_delims(struct mdoc *mdoc, int line, int *pos, char *buf) +{ + int c, lastarg; + char *p; + + if (0 == buf[*pos]) + return(1); + + for (;;) { + lastarg = *pos; + c = mdoc_args(mdoc, line, pos, buf, 0, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == c) + return(0); + else if (ARGS_EOLN == c) + break; + assert(mdoc_isdelim(p)); + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + } + return(1); } +/* + * Close out an explicit scope. This optionally parses a TAIL type with + * a set number of TEXT children. + */ int -macro_text(MACRO_PROT_ARGS) +macro_scoped_close(MACRO_PROT_ARGS) { - int lastarg, c, lasttok, lastpunct, j; - char *args[MDOC_LINEARG_MAX], *p; + int tt, j, c, lastarg, maxargs, flushed; + char *p; - lasttok = ppos; - lastpunct = 0; - j = 0; + switch (tok) { + case (MDOC_Ec): + maxargs = 1; + break; + default: + maxargs = 0; + break; + } - if (SEC_PROLOGUE == mdoc->sec_lastn) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE)); + tt = rewind_alt(tok); -again: + mdoc_msg(mdoc, "parse: %s closing %s", + mdoc_macronames[tok], mdoc_macronames[tt]); - lastarg = *pos; - c = args_next(mdoc, tok, pos, buf, &args[j]); - - if (-1 == c) + if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { + if (0 == buf[*pos]) { + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) + return(0); + return(rewind_expblock(mdoc, tok, line, ppos)); + } + return(perr(mdoc, line, ppos, ENOPARMS)); + } + + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); - if (0 == c && ! lastpunct) - return(append_text(mdoc, tok, lasttok, j, args)); - else if (0 == c) - return(1); - /* Command found. */ + lastarg = ppos; + flushed = 0; - if (MDOC_MAX != (c = mdoc_find(mdoc, args[j]))) { - if ( ! lastpunct) - if ( ! append_text(mdoc, tok, lasttok, j, args)) + if (maxargs > 0) { + if ( ! mdoc_tail_alloc(mdoc, line, ppos, tt)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + } + + for (j = 0; /* No sentinel. */; j++) { + lastarg = *pos; + + if (j == maxargs && ! flushed) { + if ( ! rewind_expblock(mdoc, tok, line, ppos)) return(0); - return(mdoc_macro(mdoc, c, lastarg, pos, buf)); + flushed = 1; + } + + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == c) + return(0); + if (ARGS_PUNCT == c) + break; + if (ARGS_EOLN == c) + break; + + if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) + return(0); + else if (MDOC_MAX != c) { + if ( ! flushed) { + if ( ! rewind_expblock(mdoc, tok, line, ppos)) + return(0); + flushed = 1; + } + if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; } - /* Word found. */ + if ( ! flushed && ! rewind_expblock(mdoc, tok, line, ppos)) + return(0); - if ( ! mdoc_isdelim(args[j])) { - j++; - goto again; + if (ppos > 1) + return(1); + return(append_delims(mdoc, line, pos, buf)); +} + + +/* + * A general text macro. This is a complex case because of punctuation. + * If a text macro is followed by words, then punctuation, the macro is + * "stopped" and "reopened" following the punctuation. Thus, the + * following arises: + * + * .Fl a ; b + * + * ELEMENT (.Fl) + * TEXT (`a') + * TEXT (`;') + * ELEMENT (.Fl) + * TEXT (`b') + * + * This must handle the following situations: + * + * .Fl Ar b ; ; + * + * ELEMENT (.Fl) + * ELEMENT (.Ar) + * TEXT (`b') + * TEXT (`;') + * TEXT (`;') + */ +int +macro_text(MACRO_PROT_ARGS) +{ + int la, lastpunct, c, w, argc; + struct mdoc_arg argv[MDOC_LINEARG_MAX]; + char *p; + + la = ppos; + lastpunct = 0; + + for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { + la = *pos; + c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); + if (ARGV_EOLN == c) + break; + if (ARGV_WORD == c) { + *pos = la; + break; + } else if (ARGV_ARG == c) + continue; + + mdoc_argv_free(argc, argv); + return(0); } - /* Punctuation found. */ + if (MDOC_LINEARG_MAX == argc) { + mdoc_argv_free(argc - 1, argv); + return(perr(mdoc, line, ppos, EARGVLIM)); + } - p = args[j]; /* Save argument (NULL-ified in append). */ + c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); - if ( ! lastpunct) - if ( ! append_text(mdoc, tok, lasttok, j, args)) + if (0 == c) { + mdoc_argv_free(argc, argv); + return(0); + } + + mdoc->next = MDOC_NEXT_CHILD; + + lastpunct = 0; + for (;;) { + la = *pos; + w = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == w) { + mdoc_argv_free(argc, argv); return(0); + } - args[j] = p; + if (ARGS_EOLN == w) + break; + if (ARGS_PUNCT == w) + break; - mdoc_word_alloc(mdoc, lastarg, args[j]); - lastpunct = 1; - j = 0; + c = ARGS_QWORD == w ? MDOC_MAX : + lookup(mdoc, line, la, tok, p); - goto again; + if (MDOC_MAX != c && -1 != c) { + if (0 == lastpunct && ! rewind_elem(mdoc, tok)) { + mdoc_argv_free(argc, argv); + return(0); + } + mdoc_argv_free(argc, argv); + c = mdoc_macro(mdoc, c, line, la, pos, buf); + if (0 == c) + return(0); + if (ppos > 1) + return(1); + return(append_delims(mdoc, line, pos, buf)); + } else if (-1 == c) { + mdoc_argv_free(argc, argv); + return(0); + } - /* NOTREACHED */ + if (ARGS_QWORD != w && mdoc_isdelim(p)) { + if (0 == lastpunct && ! rewind_elem(mdoc, tok)) { + mdoc_argv_free(argc, argv); + return(0); + } + lastpunct = 1; + } else if (lastpunct) { + c = mdoc_elem_alloc(mdoc, line, + ppos, tok, argc, argv); + if (0 == c) { + mdoc_argv_free(argc, argv); + return(0); + } + mdoc->next = MDOC_NEXT_CHILD; + lastpunct = 0; + } + + if ( ! mdoc_word_alloc(mdoc, line, la, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + } + + mdoc_argv_free(argc, argv); + + if (0 == lastpunct && ! rewind_elem(mdoc, tok)) + return(0); + if (ppos > 1) + return(1); + return(append_delims(mdoc, line, pos, buf)); } +/* + * Handle explicit-scope (having a different closure token) and implicit + * scope (closing out prior scopes when re-invoked) macros. These + * constitute the BLOCK type and usually span multiple lines. These + * always have HEAD and sometimes have BODY types. In the multi-line + * case: + * + * .Bd -ragged + * Text. + * .Fl macro + * Another. + * .Ed + * + * BLOCK (.Bd) + * HEAD + * BODY + * TEXT (`Text.') + * ELEMENT (.Fl) + * TEXT (`macro') + * TEXT (`Another.') + * + * Note that the `.It' macro, possibly the most difficult (as it has + * embedded scope, etc.) is handled by this routine. + */ int -macro_prologue_dtitle(MACRO_PROT_ARGS) +macro_scoped(MACRO_PROT_ARGS) { - int c, lastarg, j; - char *args[MDOC_LINEARG_MAX]; + int c, lastarg, argc; + struct mdoc_arg argv[MDOC_LINEARG_MAX]; + char *p; - if (SEC_PROLOGUE != mdoc->sec_lastn) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_NPROLOGUE)); - if (0 == mdoc->meta.date) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE_OO)); - if (mdoc->meta.title[0]) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE_REP)); + assert ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)); - j = -1; + /* First rewind extant implicit scope. */ -again: - lastarg = *pos; - c = args_next(mdoc, tok, pos, buf, &args[++j]); + if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) { + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) + return(0); + if ( ! rewind_impblock(mdoc, tok, line, ppos)) + return(0); + } - if (0 == c) { - mdoc->sec_lastn = mdoc->sec_last = SEC_BODY; /* FIXME */ - if (mdoc->meta.title) - return(1); - if ( ! mdoc_warn(mdoc, tok, ppos, WARN_ARGS_GE1)) + /* Parse arguments. */ + + for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { + lastarg = *pos; + c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); + if (ARGV_EOLN == c) + break; + if (ARGV_WORD == c) { + *pos = lastarg; + break; + } else if (ARGV_ARG == c) + continue; + mdoc_argv_free(argc, argv); + return(0); + } + + if (MDOC_LINEARG_MAX == argc) { + mdoc_argv_free(argc - 1, argv); + return(perr(mdoc, line, ppos, EARGVLIM)); + } + + c = mdoc_block_alloc(mdoc, line, ppos, + tok, (size_t)argc, argv); + mdoc_argv_free(argc, argv); + + if (0 == c) + return(0); + + mdoc->next = MDOC_NEXT_CHILD; + + if (0 == buf[*pos]) { + if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) return(0); - (void)xstrlcpy(mdoc->meta.title, - "UNTITLED", META_TITLE_SZ); + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; return(1); - } else if (-1 == c) + } + + if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) return(0); + mdoc->next = MDOC_NEXT_CHILD; + + for (;;) { + lastarg = *pos; + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + + if (ARGS_ERROR == c) + return(0); + if (ARGS_PUNCT == c) + break; + if (ARGS_EOLN == c) + break; + + if (ARGS_PHRASE == c) { + /* + if ( ! mdoc_phrase(mdoc, line, lastarg, buf)) + return(0); + */ + continue; + } + + /* FIXME: if .It -column, the lookup must be for a + * sub-line component. BLAH. */ - if (MDOC_MAX != mdoc_find(mdoc, args[j]) && ! mdoc_warn - (mdoc, tok, lastarg, WARN_SYNTAX_MACLIKE)) + if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) + return(0); + + if (MDOC_MAX == c) { + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + continue; + } + + if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) return(0); + if (1 == ppos && ! append_delims(mdoc, line, pos, buf)) + return(0); - if (0 == j) { - if (xstrlcpy(mdoc->meta.title, args[0], META_TITLE_SZ)) - goto again; - return(mdoc_err(mdoc, tok, lastarg, ERR_SYNTAX_ARGS)); + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; - } else if (1 == j) { - mdoc->meta.msec = mdoc_atomsec(args[1]); - if (MSEC_DEFAULT != mdoc->meta.msec) - goto again; - return(mdoc_err(mdoc, tok, -1, ERR_SYNTAX_ARGS)); + return(1); +} - } else if (2 == j) { - mdoc->meta.vol = mdoc_atovol(args[2]); - if (VOL_DEFAULT != mdoc->meta.vol) - goto again; - mdoc->meta.arch = mdoc_atoarch(args[2]); - if (ARCH_DEFAULT != mdoc->meta.arch) - goto again; - return(mdoc_err(mdoc, tok, lastarg, ERR_SYNTAX_ARGS)); + +/* + * This handles a case of implicitly-scoped macro (BLOCK) limited to a + * single line. Instead of being closed out by a subsequent call to + * another macro, the scope is closed at the end of line. These don't + * have BODY or TAIL types. Notice that the punctuation falls outside + * of the HEAD type. + * + * .Qq a Fl b Ar d ; ; + * + * BLOCK (Qq) + * HEAD + * TEXT (`a') + * ELEMENT (.Fl) + * TEXT (`b') + * ELEMENT (.Ar) + * TEXT (`d') + * TEXT (`;') + * TEXT (`;') + */ +int +macro_scoped_line(MACRO_PROT_ARGS) +{ + int lastarg, c; + char *p; + + if ( ! mdoc_block_alloc(mdoc, line, ppos, tok, 0, NULL)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + + if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + + /* XXX - no known argument macros. */ + + lastarg = ppos; + for (;;) { + lastarg = *pos; + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == c) + return(0); + if (ARGS_PUNCT == c) + break; + if (ARGS_EOLN == c) + break; + + if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) + return(0); + else if (MDOC_MAX == c) { + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + continue; + } + + if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf)) + return(0); + break; } - return(mdoc_err(mdoc, tok, lastarg, ERR_ARGS_MANY)); + if (1 == ppos) { + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) + return(0); + if ( ! append_delims(mdoc, line, pos, buf)) + return(0); + } else if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) + return(0); + return(rewind_impblock(mdoc, tok, line, ppos)); } +/* + * A constant-scoped macro is like a simple-scoped macro (mdoc_scoped) + * except that it doesn't handle implicit scopes and explicit ones have + * a fixed number of TEXT children to the BODY. + * + * .Fl a So b Sc ; + * + * ELEMENT (.Fl) + * TEXT (`a') + * BLOCK (.So) + * HEAD + * BODY + * TEXT (`b') + * TEXT (';') + */ int -macro_prologue_ddate(MACRO_PROT_ARGS) +macro_constant_scoped(MACRO_PROT_ARGS) { - int c, lastarg, j; - char *args[MDOC_LINEARG_MAX], date[64]; + int lastarg, flushed, j, c, maxargs; + char *p; - if (SEC_PROLOGUE != mdoc->sec_lastn) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_NPROLOGUE)); - if (mdoc->meta.title[0]) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE_OO)); - if (mdoc->meta.date) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE_REP)); + lastarg = ppos; + flushed = 0; - j = -1; - date[0] = 0; + switch (tok) { + case (MDOC_Eo): + maxargs = 1; + break; + default: + maxargs = 0; + break; + } -again: + if ( ! mdoc_block_alloc(mdoc, line, ppos, tok, 0, NULL)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; - lastarg = *pos; - c = args_next(mdoc, tok, pos, buf, &args[++j]); - if (0 == c) { - if (mdoc->meta.date) - return(1); - mdoc->meta.date = mdoc_atotime(date); - if (mdoc->meta.date) - return(1); - return(mdoc_err(mdoc, tok, ppos, ERR_SYNTAX_ARGS)); - } else if (-1 == c) + if (0 == maxargs) { + if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) + return(0); + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + flushed = 1; + } else if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) return(0); + + mdoc->next = MDOC_NEXT_CHILD; + + for (j = 0; /* No sentinel. */; j++) { + lastarg = *pos; + + if (j == maxargs && ! flushed) { + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + flushed = 1; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + } + + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == c) + return(0); + if (ARGS_PUNCT == c) + break; + if (ARGS_EOLN == c) + break; + + if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) + return(0); + else if (MDOC_MAX != c) { + if ( ! flushed) { + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + flushed = 1; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + } + if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! flushed && mdoc_isdelim(p)) { + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + flushed = 1; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + } - if (MDOC_MAX != mdoc_find(mdoc, args[j]) && ! mdoc_warn - (mdoc, tok, lastarg, WARN_SYNTAX_MACLIKE)) - return(0); - - if (0 == j) { - if (xstrcmp("$Mdocdate: December 17 2008 $", args[j])) { - mdoc->meta.date = time(NULL); - goto again; - } else if (xstrcmp("$Mdocdate:", args[j])) - goto again; - } else if (4 == j) - if ( ! xstrcmp("$", args[j])) - goto again; + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + } - if ( ! xstrlcat(date, args[j], sizeof(date))) - return(mdoc_err(mdoc, tok, lastarg, ERR_SYNTAX_ARGS)); - if ( ! xstrlcat(date, " ", sizeof(date))) - return(mdoc_err(mdoc, tok, lastarg, ERR_SYNTAX_ARGS)); + if ( ! flushed) { + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); + mdoc->next = MDOC_NEXT_CHILD; + } - goto again; - /* NOTREACHED */ + if (ppos > 1) + return(1); + return(append_delims(mdoc, line, pos, buf)); } +/* + * A delimited constant is very similar to the macros parsed by + * macro_text except that, in the event of punctuation, the macro isn't + * "re-opened" as it is in macro_text. Also, these macros have a fixed + * number of parameters. + * + * .Fl a No b + * + * ELEMENT (.Fl) + * TEXT (`a') + * ELEMENT (.No) + * TEXT (`b') + */ int -macro_scoped_implicit(MACRO_PROT_ARGS) +macro_constant_delimited(MACRO_PROT_ARGS) { - int t, c, lastarg, j; - char *args[MDOC_LINEARG_MAX]; - struct mdoc_node *n; + int lastarg, flushed, j, c, maxargs, argc; + struct mdoc_arg argv[MDOC_LINEARG_MAX]; + char *p; - assert( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)); + lastarg = ppos; + flushed = 0; - if (SEC_PROLOGUE == mdoc->sec_lastn) - return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE)); + switch (tok) { + case (MDOC_No): + /* FALLTHROUGH */ + case (MDOC_Ns): + /* FALLTHROUGH */ + case (MDOC_Pf): + /* FALLTHROUGH */ + case (MDOC_Ux): + /* FALLTHROUGH */ + case (MDOC_St): + maxargs = 0; + break; + default: + maxargs = 1; + break; + } - /* LINTED */ - for (n = mdoc->last; n; n = n->parent) { - if (MDOC_BLOCK != n->type) - continue; - if (tok == (t = n->data.block.tok)) + for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { + lastarg = *pos; + c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); + if (ARGV_EOLN == c) break; - if ( ! (MDOC_EXPLICIT & mdoc_macros[t].flags)) + if (ARGV_WORD == c) { + *pos = lastarg; + break; + } else if (ARGV_ARG == c) continue; - return(mdoc_err(mdoc, tok, ppos, ERR_SCOPE_BREAK)); + mdoc_argv_free(argc, argv); + return(0); } - if (n) { - mdoc->last = n; - mdoc_msg(mdoc, ppos, "scope: rewound `%s'", - mdoc_macronames[tok]); - } else - mdoc_msg(mdoc, ppos, "scope: new `%s'", - mdoc_macronames[tok]); + if (MDOC_LINEARG_MAX == argc) { + mdoc_argv_free(argc - 1, argv); + return(perr(mdoc, line, ppos, EARGVLIM)); + } - j = 0; + c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); + mdoc_argv_free(argc, argv); -again: + if (0 == c) + return(0); - lastarg = *pos; - c = args_next(mdoc, tok, pos, buf, &args[j]); + mdoc->next = MDOC_NEXT_CHILD; + + for (j = 0; /* No sentinel. */; j++) { + lastarg = *pos; + + if (j == maxargs && ! flushed) { + if ( ! rewind_elem(mdoc, tok)) + return(0); + flushed = 1; + } + + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == c) + return(0); + if (ARGS_PUNCT == c) + break; + if (ARGS_EOLN == c) + break; + + if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) + return(0); + else if (MDOC_MAX != c) { + if ( ! flushed && ! rewind_elem(mdoc, tok)) + return(0); + flushed = 1; + if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! flushed && mdoc_isdelim(p)) { + if ( ! rewind_elem(mdoc, tok)) + return(0); + flushed = 1; + } - if (-1 == c) + if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + } + + if ( ! flushed && ! rewind_elem(mdoc, tok)) return(0); + + if (ppos > 1) + return(1); + return(append_delims(mdoc, line, pos, buf)); +} + + +/* + * A constant macro is the simplest classification. It spans an entire + * line. + */ +int +macro_constant(MACRO_PROT_ARGS) +{ + int c, w, la, argc; + struct mdoc_arg argv[MDOC_LINEARG_MAX]; + char *p; + + assert( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)); + + for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { + la = *pos; + c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); + if (ARGV_EOLN == c) + break; + if (ARGV_WORD == c) { + *pos = la; + break; + } else if (ARGV_ARG == c) + continue; + + mdoc_argv_free(argc, argv); + return(0); + } + + if (MDOC_LINEARG_MAX == argc) { + mdoc_argv_free(argc - 1, argv); + return(perr(mdoc, line, ppos, EARGVLIM)); + } + + c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); + mdoc_argv_free(argc, argv); + if (0 == c) - return(append_scoped(mdoc, tok, ppos, j, args)); + return(0); - /* Command found. */ + mdoc->next = MDOC_NEXT_CHILD; - if (MDOC_MAX != (c = mdoc_find(mdoc, args[j]))) - if ( ! mdoc_warn(mdoc, tok, lastarg, WARN_SYNTAX_MACLIKE)) + for (;;) { + la = *pos; + w = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == w) return(0); + if (ARGS_EOLN == w) + break; - /* Word found. */ + c = ARGS_QWORD == w ? MDOC_MAX : + lookup(mdoc, line, la, tok, p); - j++; - goto again; + if (MDOC_MAX != c && -1 != c) { + if ( ! rewind_elem(mdoc, tok)) + return(0); + return(mdoc_macro(mdoc, c, line, la, pos, buf)); + } else if (-1 == c) + return(0); - /* NOTREACHED */ + if ( ! mdoc_word_alloc(mdoc, line, la, p)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + } + + return(rewind_elem(mdoc, tok)); } -static int -xstrcmp(const char *p1, const char *p2) +/* ARGSUSED */ +int +macro_obsolete(MACRO_PROT_ARGS) { - return(0 == strcmp(p1, p2)); + return(pwarn(mdoc, line, ppos, WOBS)); } -static int -xstrlcat(char *dst, const char *src, size_t sz) +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ +int +macro_end(struct mdoc *mdoc) { + struct mdoc_node *n; - return(strlcat(dst, src, sz) < sz); -} + assert(mdoc->first); + assert(mdoc->last); + /* Scan for open explicit scopes. */ -static int -xstrlcpy(char *dst, const char *src, size_t sz) -{ + n = MDOC_VALID & mdoc->last->flags ? + mdoc->last->parent : mdoc->last; - return(strlcpy(dst, src, sz) < sz); + for ( ; n; n = n->parent) { + if (MDOC_BLOCK != n->type) + continue; + if ( ! (MDOC_EXPLICIT & mdoc_macros[n->tok].flags)) + continue; + return(mdoc_nerr(mdoc, n, "macro scope still open on exit")); + } + + return(rewind_last(mdoc, mdoc->first)); }