=================================================================== RCS file: /cvs/mandoc/Attic/macro.c,v retrieving revision 1.42 retrieving revision 1.49 diff -u -p -r1.42 -r1.49 --- mandoc/Attic/macro.c 2009/01/19 17:02:58 1.42 +++ mandoc/Attic/macro.c 2009/01/22 14:56:21 1.49 @@ -1,4 +1,4 @@ -/* $Id: macro.c,v 1.42 2009/01/19 17:02:58 kristaps Exp $ */ +/* $Id: macro.c,v 1.49 2009/01/22 14:56:21 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -25,19 +25,66 @@ #include #endif +/* + * This has scanning/parsing routines, each of which extract a macro and + * its arguments and parameters, then know how to progress to the next + * macro. Macros are parsed according as follows: + * + * ELEMENT: TEXT | epsilon + * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT + * BLOCK_TAIL: TAIL | epsilon + * HEAD: ELEMENT | TEXT | BLOCK | epsilon + * BODY: ELEMENT | TEXT | BLOCK | epsilon + * TAIL: TEXT | epsilon + * PUNCT: TEXT (delimiters) | epsilon + * + * These are arranged into a parse tree, an example of which follows: + * + * ROOT + * BLOCK (.Sh) + * HEAD + * TEXT (`NAME') + * BODY + * ELEMENT (.Nm) + * TEXT (`mdocml') + * ELEMENT (.Nd) + * TEXT (`mdoc macro compiler') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * ELEMENT (.Fl) + * TEXT (`W') + * ELEMENT (.Ns) + * ELEMENT (.Ar) + * TEXT (`err...') + * + * These types are always per-line except for block bodies, which may + * span multiple lines. Macros are assigned a parsing routine, which + * corresponds to the type, in the mdoc_macros table. + * + * Note that types are general: there can be several parsing routines + * corresponding to a single type. The macro_text function, for + * example, parses an ELEMENT type (see the function definition for + * details) that may be interrupted by further macros; the + * macro_constant function, on the other hand, parses an ELEMENT type + * spanning a single line. + */ + #include "private.h" -/* FIXME: maxlineargs should be per LINE, no per TOKEN. */ - -static int rewind_alt(int); -static int rewind_dohalt(int, enum mdoc_type, - const struct mdoc_node *); #define REWIND_REWIND (1 << 0) #define REWIND_NOHALT (1 << 1) #define REWIND_HALT (1 << 2) -static int rewind_dobreak(int, const struct mdoc_node *); - +static int rewind_dohalt(int, enum mdoc_type, + const struct mdoc_node *); +static int rewind_alt(int); +static int rewind_dobreak(int, const struct mdoc_node *); static int rewind_elem(struct mdoc *, int); static int rewind_impblock(struct mdoc *, int, int, int); static int rewind_expblock(struct mdoc *, int, int, int); @@ -46,9 +93,66 @@ static int rewind_subblock(enum mdoc_type, static int rewind_last(struct mdoc *, struct mdoc_node *); static int append_delims(struct mdoc *, int, int *, char *); static int lookup(struct mdoc *, int, int, int, const char *); +static int pwarn(struct mdoc *, int, int, int); +static int perr(struct mdoc *, int, int, int); +#define WMACPARM (1) +#define WOBS (2) +#define ENOCTX (1) +#define ENOPARMS (2) +#define EARGVLIM (3) + + static int +perr(struct mdoc *mdoc, int line, int pos, int type) +{ + int c; + + switch (type) { + case (ENOCTX): + c = mdoc_perr(mdoc, line, pos, + "closing macro has prior context"); + break; + case (ENOPARMS): + c = mdoc_perr(mdoc, line, pos, + "macro doesn't expect parameters"); + break; + case (EARGVLIM): + c = mdoc_perr(mdoc, line, pos, + "argument hard-limit %d reached", + MDOC_LINEARG_MAX); + break; + default: + abort(); + /* NOTREACHED */ + } + return(c); +} + +static int +pwarn(struct mdoc *mdoc, int line, int pos, int type) +{ + int c; + + switch (type) { + case (WMACPARM): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "macro-like parameter"); + break; + case (WOBS): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "macro is marked obsolete"); + break; + default: + abort(); + /* NOTREACHED */ + } + return(c); +} + + +static int lookup(struct mdoc *mdoc, int line, int pos, int from, const char *p) { int res; @@ -58,7 +162,7 @@ lookup(struct mdoc *mdoc, int line, int pos, int from, return(res); if (MDOC_MAX == res) return(res); - if ( ! mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, "macro-like parameter")) + if ( ! pwarn(mdoc, line, pos, WMACPARM)) return(-1); return(MDOC_MAX); } @@ -71,6 +175,7 @@ rewind_last(struct mdoc *mdoc, struct mdoc_node *to) assert(to); mdoc->next = MDOC_NEXT_SIBLING; + /* LINTED */ while (mdoc->last != to) { if ( ! mdoc_valid_post(mdoc)) return(0); @@ -158,7 +263,7 @@ rewind_dohalt(int tok, enum mdoc_type type, const stru case (MDOC_Qq): /* FALLTHROUGH */ case (MDOC_Sq): - assert(MDOC_BODY != type); + assert(MDOC_HEAD != type); assert(MDOC_TAIL != type); if (type == p->type && tok == p->tok) return(REWIND_REWIND); @@ -350,7 +455,7 @@ rewind_expblock(struct mdoc *mdoc, int tok, int line, for (n = mdoc->last; n; n = n->parent) { c = rewind_dohalt(tok, MDOC_BLOCK, n); if (REWIND_HALT == c) - return(mdoc_perr(mdoc, line, ppos, "closing macro has no context")); + return(perr(mdoc, line, ppos, ENOCTX)); if (REWIND_REWIND == c) break; else if (rewind_dobreak(tok, n)) @@ -398,6 +503,8 @@ append_delims(struct mdoc *mdoc, int line, int *pos, c for (;;) { lastarg = *pos; c = mdoc_args(mdoc, line, pos, buf, 0, &p); + assert(ARGS_PHRASE != c); + if (ARGS_ERROR == c) return(0); else if (ARGS_EOLN == c) @@ -412,6 +519,10 @@ append_delims(struct mdoc *mdoc, int line, int *pos, c } +/* + * Close out an explicit scope. This optionally parses a TAIL type with + * a set number of TEXT children. + */ int macro_scoped_close(MACRO_PROT_ARGS) { @@ -438,7 +549,7 @@ macro_scoped_close(MACRO_PROT_ARGS) return(0); return(rewind_expblock(mdoc, tok, line, ppos)); } - return(mdoc_perr(mdoc, line, ppos, "macro expects no parameters")); + return(perr(mdoc, line, ppos, ENOPARMS)); } if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) @@ -462,7 +573,9 @@ macro_scoped_close(MACRO_PROT_ARGS) flushed = 1; } - c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p); + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + if (ARGS_ERROR == c) return(0); if (ARGS_PUNCT == c) @@ -497,10 +610,34 @@ macro_scoped_close(MACRO_PROT_ARGS) } +/* + * A general text macro. This is a complex case because of punctuation. + * If a text macro is followed by words, then punctuation, the macro is + * "stopped" and "reopened" following the punctuation. Thus, the + * following arises: + * + * .Fl a ; b + * + * ELEMENT (.Fl) + * TEXT (`a') + * TEXT (`;') + * ELEMENT (.Fl) + * TEXT (`b') + * + * This must handle the following situations: + * + * .Fl Ar b ; ; + * + * ELEMENT (.Fl) + * ELEMENT (.Ar) + * TEXT (`b') + * TEXT (`;') + * TEXT (`;') + */ int macro_text(MACRO_PROT_ARGS) { - int la, lastpunct, c, fl, argc; + int la, lastpunct, c, w, argc; struct mdoc_arg argv[MDOC_LINEARG_MAX]; char *p; @@ -524,7 +661,7 @@ macro_text(MACRO_PROT_ARGS) if (MDOC_LINEARG_MAX == argc) { mdoc_argv_free(argc - 1, argv); - return(mdoc_perr(mdoc, line, ppos, "parameter hard-limit exceeded")); + return(perr(mdoc, line, ppos, EARGVLIM)); } c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); @@ -536,42 +673,43 @@ macro_text(MACRO_PROT_ARGS) mdoc->next = MDOC_NEXT_CHILD; - fl = ARGS_DELIM; - if (MDOC_QUOTABLE & mdoc_macros[tok].flags) - fl |= ARGS_QUOTED; - lastpunct = 0; for (;;) { la = *pos; - c = mdoc_args(mdoc, line, pos, buf, fl, &p); - if (ARGS_ERROR == c) { + w = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == w) { mdoc_argv_free(argc, argv); return(0); } - if (ARGS_EOLN == c) + if (ARGS_EOLN == w) break; - if (ARGS_PUNCT == c) + if (ARGS_PUNCT == w) break; - if (-1 == (c = lookup(mdoc, line, la, tok, p))) - return(0); - else if (MDOC_MAX != c) { + c = ARGS_QWORD == w ? MDOC_MAX : + lookup(mdoc, line, la, tok, p); + + if (MDOC_MAX != c && -1 != c) { if (0 == lastpunct && ! rewind_elem(mdoc, tok)) { mdoc_argv_free(argc, argv); return(0); } mdoc_argv_free(argc, argv); - c = mdoc_macro(mdoc, c, line, la, pos, buf); if (0 == c) return(0); if (ppos > 1) return(1); return(append_delims(mdoc, line, pos, buf)); + } else if (-1 == c) { + mdoc_argv_free(argc, argv); + return(0); } - if (mdoc_isdelim(p)) { + if (ARGS_QWORD != w && mdoc_isdelim(p)) { if (0 == lastpunct && ! rewind_elem(mdoc, tok)) { mdoc_argv_free(argc, argv); return(0); @@ -603,15 +741,41 @@ macro_text(MACRO_PROT_ARGS) } +/* + * Handle explicit-scope (having a different closure token) and implicit + * scope (closing out prior scopes when re-invoked) macros. These + * constitute the BLOCK type and usually span multiple lines. These + * always have HEAD and sometimes have BODY types. In the multi-line + * case: + * + * .Bd -ragged + * Text. + * .Fl macro + * Another. + * .Ed + * + * BLOCK (.Bd) + * HEAD + * BODY + * TEXT (`Text.') + * ELEMENT (.Fl) + * TEXT (`macro') + * TEXT (`Another.') + * + * Note that the `.It' macro, possibly the most difficult (as it has + * embedded scope, etc.) is handled by this routine. + */ int macro_scoped(MACRO_PROT_ARGS) { - int c, lastarg, argc, fl; + int c, lastarg, argc; struct mdoc_arg argv[MDOC_LINEARG_MAX]; char *p; assert ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)); + /* First rewind extant implicit scope. */ + if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) { if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); @@ -619,6 +783,8 @@ macro_scoped(MACRO_PROT_ARGS) return(0); } + /* Parse arguments. */ + for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { lastarg = *pos; c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); @@ -635,7 +801,7 @@ macro_scoped(MACRO_PROT_ARGS) if (MDOC_LINEARG_MAX == argc) { mdoc_argv_free(argc - 1, argv); - return(mdoc_perr(mdoc, line, ppos, "parameter hard-limit exceeded")); + return(perr(mdoc, line, ppos, EARGVLIM)); } c = mdoc_block_alloc(mdoc, line, ppos, @@ -662,24 +828,32 @@ macro_scoped(MACRO_PROT_ARGS) return(0); mdoc->next = MDOC_NEXT_CHILD; - fl = ARGS_DELIM; - if (MDOC_TABSEP & mdoc_macros[tok].flags) - fl |= ARGS_TABSEP; - for (;;) { lastarg = *pos; - c = mdoc_args(mdoc, line, pos, buf, fl, &p); - + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ARGS_ERROR == c) return(0); if (ARGS_PUNCT == c) break; if (ARGS_EOLN == c) break; + + if (ARGS_PHRASE == c) { + /* + if ( ! mdoc_phrase(mdoc, line, lastarg, buf)) + return(0); + */ + continue; + } + + /* FIXME: if .It -column, the lookup must be for a + * sub-line component. BLAH. */ if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) return(0); - else if (MDOC_MAX == c) { + + if (MDOC_MAX == c) { if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) return(0); mdoc->next = MDOC_NEXT_SIBLING; @@ -690,7 +864,7 @@ macro_scoped(MACRO_PROT_ARGS) return(0); break; } - + if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) return(0); if (1 == ppos && ! append_delims(mdoc, line, pos, buf)) @@ -704,6 +878,25 @@ macro_scoped(MACRO_PROT_ARGS) } +/* + * This handles a case of implicitly-scoped macro (BLOCK) limited to a + * single line. Instead of being closed out by a subsequent call to + * another macro, the scope is closed at the end of line. These don't + * have BODY or TAIL types. Notice that the punctuation falls outside + * of the HEAD type. + * + * .Qq a Fl b Ar d ; ; + * + * BLOCK (Qq) + * HEAD + * TEXT (`a') + * ELEMENT (.Fl) + * TEXT (`b') + * ELEMENT (.Ar) + * TEXT (`d') + * TEXT (`;') + * TEXT (`;') + */ int macro_scoped_line(MACRO_PROT_ARGS) { @@ -716,6 +909,9 @@ macro_scoped_line(MACRO_PROT_ARGS) if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) return(0); + mdoc->next = MDOC_NEXT_SIBLING; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); mdoc->next = MDOC_NEXT_CHILD; /* XXX - no known argument macros. */ @@ -723,7 +919,8 @@ macro_scoped_line(MACRO_PROT_ARGS) lastarg = ppos; for (;;) { lastarg = *pos; - c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p); + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); if (ARGS_ERROR == c) return(0); @@ -747,16 +944,31 @@ macro_scoped_line(MACRO_PROT_ARGS) } if (1 == ppos) { - if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); if ( ! append_delims(mdoc, line, pos, buf)) return(0); - } else if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + } else if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); return(rewind_impblock(mdoc, tok, line, ppos)); } +/* + * A constant-scoped macro is like a simple-scoped macro (mdoc_scoped) + * except that it doesn't handle implicit scopes and explicit ones have + * a fixed number of TEXT children to the BODY. + * + * .Fl a So b Sc ; + * + * ELEMENT (.Fl) + * TEXT (`a') + * BLOCK (.So) + * HEAD + * BODY + * TEXT (`b') + * TEXT (';') + */ int macro_constant_scoped(MACRO_PROT_ARGS) { @@ -804,7 +1016,9 @@ macro_constant_scoped(MACRO_PROT_ARGS) mdoc->next = MDOC_NEXT_CHILD; } - c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p); + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + if (ARGS_ERROR == c) return(0); if (ARGS_PUNCT == c) @@ -856,6 +1070,19 @@ macro_constant_scoped(MACRO_PROT_ARGS) } +/* + * A delimited constant is very similar to the macros parsed by + * macro_text except that, in the event of punctuation, the macro isn't + * "re-opened" as it is in macro_text. Also, these macros have a fixed + * number of parameters. + * + * .Fl a No b + * + * ELEMENT (.Fl) + * TEXT (`a') + * ELEMENT (.No) + * TEXT (`b') + */ int macro_constant_delimited(MACRO_PROT_ARGS) { @@ -899,7 +1126,7 @@ macro_constant_delimited(MACRO_PROT_ARGS) if (MDOC_LINEARG_MAX == argc) { mdoc_argv_free(argc - 1, argv); - return(mdoc_perr(mdoc, line, ppos, "parameter hard-limit exceeded")); + return(perr(mdoc, line, ppos, EARGVLIM)); } c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); @@ -919,7 +1146,9 @@ macro_constant_delimited(MACRO_PROT_ARGS) flushed = 1; } - c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p); + c = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + if (ARGS_ERROR == c) return(0); if (ARGS_PUNCT == c) @@ -958,24 +1187,26 @@ macro_constant_delimited(MACRO_PROT_ARGS) } +/* + * A constant macro is the simplest classification. It spans an entire + * line. + */ int macro_constant(MACRO_PROT_ARGS) { - int c, lastarg, argc, fl; - struct mdoc_arg argv[MDOC_LINEARG_MAX]; - char *p; + int c, w, la, argc; + struct mdoc_arg argv[MDOC_LINEARG_MAX]; + char *p; - fl = 0; - if (MDOC_QUOTABLE & mdoc_macros[tok].flags) - fl = ARGS_QUOTED; + assert( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)); for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) { - lastarg = *pos; + la = *pos; c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf); if (ARGV_EOLN == c) break; if (ARGV_WORD == c) { - *pos = lastarg; + *pos = la; break; } else if (ARGV_ARG == c) continue; @@ -986,7 +1217,7 @@ macro_constant(MACRO_PROT_ARGS) if (MDOC_LINEARG_MAX == argc) { mdoc_argv_free(argc - 1, argv); - return(mdoc_perr(mdoc, line, ppos, "parameter hard-limit exceeded")); + return(perr(mdoc, line, ppos, EARGVLIM)); } c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv); @@ -998,23 +1229,26 @@ macro_constant(MACRO_PROT_ARGS) mdoc->next = MDOC_NEXT_CHILD; for (;;) { - lastarg = *pos; - c = mdoc_args(mdoc, line, pos, buf, fl, &p); - if (ARGS_ERROR == c) + la = *pos; + w = mdoc_args(mdoc, line, pos, buf, tok, &p); + assert(ARGS_PHRASE != c); + + if (ARGS_ERROR == w) return(0); - if (ARGS_EOLN == c) + if (ARGS_EOLN == w) break; - if (-1 == (c = lookup(mdoc, line, lastarg, tok, p))) - return(0); - else if (MDOC_MAX != c) { + c = ARGS_QWORD == w ? MDOC_MAX : + lookup(mdoc, line, la, tok, p); + + if (MDOC_MAX != c && -1 != c) { if ( ! rewind_elem(mdoc, tok)) return(0); - return(mdoc_macro(mdoc, c, line, - lastarg, pos, buf)); - } + return(mdoc_macro(mdoc, c, line, la, pos, buf)); + } else if (-1 == c) + return(0); - if ( ! mdoc_word_alloc(mdoc, line, lastarg, p)) + if ( ! mdoc_word_alloc(mdoc, line, la, p)) return(0); mdoc->next = MDOC_NEXT_SIBLING; } @@ -1028,10 +1262,15 @@ int macro_obsolete(MACRO_PROT_ARGS) { - return(mdoc_pwarn(mdoc, line, ppos, WARN_SYNTAX, "macro is obsolete")); + return(pwarn(mdoc, line, ppos, WOBS)); } +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ int macro_end(struct mdoc *mdoc) { @@ -1050,8 +1289,7 @@ macro_end(struct mdoc *mdoc) continue; if ( ! (MDOC_EXPLICIT & mdoc_macros[n->tok].flags)) continue; - mdoc_nerr(mdoc, n, "macro scope still open on exit"); - return(0); + return(mdoc_nerr(mdoc, n, "macro scope still open on exit")); } return(rewind_last(mdoc, mdoc->first));