=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.1 retrieving revision 1.13 diff -u -p -r1.1 -r1.13 --- mandoc/mdoc_validate.c 2009/03/25 15:17:49 1.1 +++ mandoc/mdoc_validate.c 2009/06/17 11:02:06 1.13 @@ -1,20 +1,18 @@ -/* $Id: mdoc_validate.c,v 1.1 2009/03/25 15:17:49 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.13 2009/06/17 11:02:06 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all - * copies. + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include @@ -22,24 +20,18 @@ #include #include #include +#include #include "libmdoc.h" /* FIXME: .Bl -diag can't have non-text children in HEAD. */ /* TODO: ignoring Pp (it's superfluous in some invocations). */ -/* - * Pre- and post-validate macros as they're parsed. Pre-validation - * occurs when the macro has been detected and its arguments parsed. - * Post-validation occurs when all child macros have also been parsed. - * In the ELEMENT case, this is simply the parameters of the macro; in - * the BLOCK case, this is the HEAD, BODY, TAIL and so on. - */ - #define PRE_ARGS struct mdoc *mdoc, const struct mdoc_node *n #define POST_ARGS struct mdoc *mdoc enum merr { + ETOOLONG, EESCAPE, EPRINT, ENODATA, @@ -50,6 +42,7 @@ enum merr { ELISTTYPE, EDISPTYPE, EMULTIDISP, + ESECNAME, EMULTILIST, EARGREP, EBOOL, @@ -57,7 +50,12 @@ enum merr { }; enum mwarn { + WPRINT, + WNOWIDTH, + WMISSWIDTH, WESCAPE, + WDEPESC, + WDEPCOL, WWRONGMSEC, WSECOOO, WSECREP, @@ -82,8 +80,6 @@ struct valids { v_post *post; }; -/* Utility checks. */ - static int pwarn(struct mdoc *, int, int, enum mwarn); static int perr(struct mdoc *, int, int, enum merr); static int check_parent(PRE_ARGS, int, enum mdoc_type); @@ -102,10 +98,11 @@ static int err_child_gt(struct mdoc *, const char *, i static int warn_child_gt(struct mdoc *, const char *, int); static int err_child_eq(struct mdoc *, const char *, int); static int warn_child_eq(struct mdoc *, const char *, int); -static inline int count_child(struct mdoc *); -static inline int warn_count(struct mdoc *, const char *, +static int count_child(struct mdoc *); +static int warn_print(struct mdoc *, int, int); +static int warn_count(struct mdoc *, const char *, int, const char *, int); -static inline int err_count(struct mdoc *, const char *, +static int err_count(struct mdoc *, const char *, int, const char *, int); static int pre_an(PRE_ARGS); static int pre_bd(PRE_ARGS); @@ -120,7 +117,6 @@ static int pre_fd(PRE_ARGS); static int pre_it(PRE_ARGS); static int pre_lb(PRE_ARGS); static int pre_os(PRE_ARGS); -static int pre_prologue(PRE_ARGS); static int pre_rv(PRE_ARGS); static int pre_sh(PRE_ARGS); static int pre_ss(PRE_ARGS); @@ -137,7 +133,6 @@ static int bwarn_ge1(POST_ARGS); static int hwarn_eq1(POST_ARGS); static int ewarn_ge1(POST_ARGS); static int ebool(POST_ARGS); - static int post_an(POST_ARGS); static int post_args(POST_ARGS); static int post_at(POST_ARGS); @@ -151,8 +146,8 @@ static int post_sh_body(POST_ARGS); static int post_sh_head(POST_ARGS); static int post_st(POST_ARGS); -#define mwarn(m, t) nwarn((m), (m)->last, (t)) -#define merr(m, t) nerr((m), (m)->last, (t)) +#define vwarn(m, t) nwarn((m), (m)->last, (t)) +#define verr(m, t) nerr((m), (m)->last, (t)) #define nwarn(m, n, t) pwarn((m), (n)->line, (n)->pos, (t)) #define nerr(m, n, t) perr((m), (n)->line, (n)->pos, (t)) @@ -160,15 +155,15 @@ static v_pre pres_an[] = { pre_an, NULL }; static v_pre pres_bd[] = { pre_display, pre_bd, NULL }; static v_pre pres_bl[] = { pre_bl, NULL }; static v_pre pres_cd[] = { pre_cd, NULL }; -static v_pre pres_dd[] = { pre_prologue, pre_dd, NULL }; +static v_pre pres_dd[] = { pre_dd, NULL }; static v_pre pres_d1[] = { pre_display, NULL }; -static v_pre pres_dt[] = { pre_prologue, pre_dt, NULL }; +static v_pre pres_dt[] = { pre_dt, NULL }; static v_pre pres_er[] = { pre_er, NULL }; static v_pre pres_ex[] = { pre_ex, NULL }; static v_pre pres_fd[] = { pre_fd, NULL }; static v_pre pres_it[] = { pre_it, NULL }; static v_pre pres_lb[] = { pre_lb, NULL }; -static v_pre pres_os[] = { pre_prologue, pre_os, NULL }; +static v_pre pres_os[] = { pre_os, NULL }; static v_pre pres_rv[] = { pre_rv, NULL }; static v_pre pres_sh[] = { pre_sh, NULL }; static v_pre pres_ss[] = { pre_ss, NULL }; @@ -197,7 +192,7 @@ static v_post posts_bf[] = { hwarn_le1, post_bf, NULL static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; const struct valids mdoc_valids[MDOC_MAX] = { - { NULL, NULL }, /* \" */ + { NULL, NULL }, /* Ap */ { pres_dd, posts_text }, /* Dd */ { pres_dt, NULL }, /* Dt */ { pres_os, NULL }, /* Os */ @@ -304,9 +299,8 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Fr */ { NULL, posts_notext }, /* Ud */ { pres_lb, posts_lb }, /* Lb */ - { NULL, NULL }, /* Ap */ { NULL, posts_pp }, /* Lp */ - { NULL, posts_text }, /* Lk */ + { NULL, NULL }, /* Lk */ { NULL, posts_text }, /* Mt */ { NULL, posts_wline }, /* Brq */ { NULL, NULL }, /* Bro */ @@ -319,6 +313,11 @@ const struct valids mdoc_valids[MDOC_MAX] = { }; +#ifdef __linux__ +extern size_t strlcat(char *, const char *, size_t); +#endif + + int mdoc_valid_pre(struct mdoc *mdoc, const struct mdoc_node *n) @@ -384,6 +383,9 @@ perr(struct mdoc *m, int line, int pos, enum merr type p = NULL; switch (type) { + case (ETOOLONG): + p = "text argument too long"; + break; case (EESCAPE): p = "invalid escape sequence"; break; @@ -411,6 +413,9 @@ perr(struct mdoc *m, int line, int pos, enum merr type case (EDISPTYPE): p = "missing display type"; break; + case (ESECNAME): + p = "the NAME section must come first"; + break; case (ELINE): p = "expected line arguments"; break; @@ -461,9 +466,25 @@ pwarn(struct mdoc *m, int line, int pos, enum mwarn ty p = "prologue macros out-of-order"; c = WARN_COMPAT; break; + case (WDEPCOL): + p = "deprecated column argument syntax"; + c = WARN_COMPAT; + break; + case (WNOWIDTH): + p = "superfluous width argument"; + break; + case (WMISSWIDTH): + p = "missing width argument"; + break; + case (WPRINT): + p = "invalid character"; + break; case (WESCAPE): p = "invalid escape sequence"; break; + case (WDEPESC): + p = "deprecated special-character escape"; + break; case (WNOLINE): p = "suggested no line arguments"; break; @@ -498,7 +519,15 @@ pwarn(struct mdoc *m, int line, int pos, enum mwarn ty } +static int +warn_print(struct mdoc *m, int ln, int pos) +{ + if (MDOC_IGN_CHARS & m->pflags) + return(pwarn(m, ln, pos, WPRINT)); + return(perr(m, ln, pos, EPRINT)); +} + static inline int warn_count(struct mdoc *m, const char *k, int want, const char *v, int has) @@ -695,20 +724,24 @@ check_text(struct mdoc *mdoc, int line, int pos, const { size_t c; - /* FIXME: indicate deprecated escapes \*(xx and \*x. */ - for ( ; *p; p++) { if ('\t' == *p) { if ( ! (MDOC_LITERAL & mdoc->flags)) - return(perr(mdoc, line, pos, EPRINT)); + if ( ! warn_print(mdoc, line, pos)) + return(0); } else if ( ! isprint((u_char)*p)) - return(perr(mdoc, line, pos, EPRINT)); + if ( ! warn_print(mdoc, line, pos)) + return(0); if ('\\' != *p) continue; c = mdoc_isescape(p); if (c) { + /* See if form is deprecated. */ + if ('*' == p[1]) + if ( ! pwarn(mdoc, line, pos, WDEPESC)) + return(0); p += (int)c - 1; continue; } @@ -764,7 +797,7 @@ pre_display(PRE_ARGS) static int pre_bl(PRE_ARGS) { - int i, type, width, offset; + int pos, col, type, width, offset; if (MDOC_BLOCK != n->type) return(1); @@ -773,11 +806,11 @@ pre_bl(PRE_ARGS) /* Make sure that only one type of list is specified. */ - type = offset = width = -1; + type = offset = width = col = -1; /* LINTED */ - for (i = 0; i < (int)n->args->argc; i++) - switch (n->args->argv[i].arg) { + for (pos = 0; pos < (int)n->args->argc; pos++) + switch (n->args->argv[pos].arg) { case (MDOC_Bullet): /* FALLTHROUGH */ case (MDOC_Dash): @@ -799,23 +832,21 @@ pre_bl(PRE_ARGS) case (MDOC_Inset): /* FALLTHROUGH */ case (MDOC_Column): - if (-1 == type) { - type = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EMULTILIST)); + if (-1 != type) + return(nerr(mdoc, n, EMULTILIST)); + type = n->args->argv[pos].arg; + col = pos; + break; case (MDOC_Width): - if (-1 == width) { - width = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EARGREP)); + if (-1 != width) + return(nerr(mdoc, n, EARGREP)); + width = n->args->argv[pos].arg; + break; case (MDOC_Offset): - if (-1 == offset) { - offset = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EARGREP)); + if (-1 != offset) + return(nerr(mdoc, n, EARGREP)); + offset = n->args->argv[pos].arg; + break; default: break; } @@ -823,7 +854,17 @@ pre_bl(PRE_ARGS) if (-1 == type) return(nerr(mdoc, n, ELISTTYPE)); + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. + */ + switch (type) { + case (MDOC_Tag): + if (-1 == width && ! nwarn(mdoc, n, WMISSWIDTH)) + return(0); + break; case (MDOC_Column): /* FALLTHROUGH */ case (MDOC_Diag): @@ -831,17 +872,25 @@ pre_bl(PRE_ARGS) case (MDOC_Inset): /* FALLTHROUGH */ case (MDOC_Item): - if (-1 == width) + if (-1 != width && ! nwarn(mdoc, n, WNOWIDTH)) + return(0); + break; + default: + break; + } + + /* + * General validation of fields. + */ + + switch (type) { + case (MDOC_Column): + assert(col >= 0); + if (0 == n->args->argv[col].sz) break; - return(mdoc_nwarn(mdoc, n, WARN_SYNTAX, - "superfluous %s argument", - mdoc_argnames[MDOC_Width])); - case (MDOC_Tag): - if (-1 != width) - break; - return(mdoc_nwarn(mdoc, n, WARN_SYNTAX, - "suggest %s argument", - mdoc_argnames[MDOC_Width])); + if ( ! nwarn(mdoc, n, WDEPCOL)) + return(0); + break; default: break; } @@ -960,7 +1009,7 @@ static int pre_er(PRE_ARGS) { - return(check_msec(mdoc, n, 2, 0)); + return(check_msec(mdoc, n, 2, 3, 9, 0)); } @@ -973,14 +1022,6 @@ pre_cd(PRE_ARGS) static int -pre_prologue(PRE_ARGS) -{ - - return(check_sec(mdoc, n, SEC_PROLOGUE, SEC_CUSTOM)); -} - - -static int pre_dt(PRE_ARGS) { @@ -1033,25 +1074,24 @@ post_bf(POST_ARGS) head = mdoc->last->head; - if (NULL == mdoc->last->args) { - if (NULL == head->child || - MDOC_TEXT != head->child->type) - return(mdoc_err(mdoc, "text argument expected")); + if (mdoc->last->args && head->child) + return(mdoc_err(mdoc, "one argument expected")); + else if (mdoc->last->args) + return(1); - p = head->child->string; - if (xstrcmp(p, "Em")) - return(1); - else if (xstrcmp(p, "Li")) - return(1); - else if (xstrcmp(p, "Sm")) - return(1); - return(mdoc_nerr(mdoc, head->child, "invalid font")); - } + if (NULL == head->child || MDOC_TEXT != head->child->type) + return(mdoc_err(mdoc, "text argument expected")); - if (head->child) - return(mdoc_err(mdoc, "one argument expected")); + p = head->child->string; - return(1); + if (0 == strcmp(p, "Em")) + return(1); + else if (0 == strcmp(p, "Li")) + return(1); + else if (0 == strcmp(p, "Sm")) + return(1); + + return(mdoc_nerr(mdoc, head->child, "invalid font mode")); } @@ -1063,7 +1103,7 @@ post_nm(POST_ARGS) return(1); if (mdoc->meta.name) return(1); - return(merr(mdoc, ENAME)); + return(verr(mdoc, ENAME)); } @@ -1074,10 +1114,10 @@ post_at(POST_ARGS) if (NULL == mdoc->last->child) return(1); if (MDOC_TEXT != mdoc->last->child->type) - return(merr(mdoc, EATT)); + return(verr(mdoc, EATT)); if (mdoc_a2att(mdoc->last->child->string)) return(1); - return(merr(mdoc, EATT)); + return(verr(mdoc, EATT)); } @@ -1088,12 +1128,12 @@ post_an(POST_ARGS) if (mdoc->last->args) { if (NULL == mdoc->last->child) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } if (mdoc->last->child) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } @@ -1103,7 +1143,7 @@ post_args(POST_ARGS) if (mdoc->last->args) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } @@ -1118,7 +1158,7 @@ post_it(POST_ARGS) n = mdoc->last->parent->parent; if (NULL == n->args) - return(merr(mdoc, ELISTTYPE)); + return(verr(mdoc, ELISTTYPE)); /* Some types require block-head, some not. */ @@ -1156,12 +1196,12 @@ post_it(POST_ARGS) } if (-1 == type) - return(merr(mdoc, ELISTTYPE)); + return(verr(mdoc, ELISTTYPE)); switch (type) { case (MDOC_Tag): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); break; case (MDOC_Hang): @@ -1172,10 +1212,10 @@ post_it(POST_ARGS) /* FALLTHROUGH */ case (MDOC_Diag): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); if (NULL == mdoc->last->body->child) - if ( ! mwarn(mdoc, WMULTILINE)) + if ( ! vwarn(mdoc, WMULTILINE)) return(0); break; case (MDOC_Bullet): @@ -1188,18 +1228,18 @@ post_it(POST_ARGS) /* FALLTHROUGH */ case (MDOC_Item): if (mdoc->last->head->child) - if ( ! mwarn(mdoc, WNOLINE)) + if ( ! vwarn(mdoc, WNOLINE)) return(0); if (NULL == mdoc->last->body->child) - if ( ! mwarn(mdoc, WMULTILINE)) + if ( ! vwarn(mdoc, WMULTILINE)) return(0); break; case (MDOC_Column): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); if (mdoc->last->body->child) - if ( ! mwarn(mdoc, WNOMULTILINE)) + if ( ! vwarn(mdoc, WNOMULTILINE)) return(0); c = mdoc->last->child; for (i = 0; c && MDOC_HEAD == c->type; c = c->next) @@ -1248,9 +1288,9 @@ ebool(struct mdoc *mdoc) for (n = mdoc->last->child; n; n = n->next) { if (MDOC_TEXT != n->type) break; - if (xstrcmp(n->string, "on")) + if (0 == strcmp(n->string, "on")) continue; - if (xstrcmp(n->string, "off")) + if (0 == strcmp(n->string, "off")) continue; break; } @@ -1266,14 +1306,14 @@ post_root(POST_ARGS) { if (NULL == mdoc->first->child) - return(merr(mdoc, ENODATA)); - if (SEC_PROLOGUE == mdoc->lastnamed) - return(merr(mdoc, ENOPROLOGUE)); + return(verr(mdoc, ENODATA)); + if ( ! (MDOC_PBODY & mdoc->flags)) + return(verr(mdoc, ENOPROLOGUE)); if (MDOC_BLOCK != mdoc->first->child->type) - return(merr(mdoc, ENODATA)); + return(verr(mdoc, ENODATA)); if (MDOC_Sh != mdoc->first->child->tok) - return(merr(mdoc, ENODATA)); + return(verr(mdoc, ENODATA)); return(1); } @@ -1285,7 +1325,7 @@ post_st(POST_ARGS) if (mdoc_a2st(mdoc->last->child->string)) return(1); - return(mwarn(mdoc, WBADSTAND)); + return(vwarn(mdoc, WBADSTAND)); } @@ -1317,28 +1357,29 @@ post_sh_body(POST_ARGS) */ if (NULL == (n = mdoc->last->child)) - return(mwarn(mdoc, WNAMESECINC)); + return(vwarn(mdoc, WNAMESECINC)); for ( ; n && n->next; n = n->next) { if (MDOC_ELEM == n->type && MDOC_Nm == n->tok) continue; if (MDOC_TEXT == n->type) continue; - if ( ! mwarn(mdoc, WNAMESECINC)) + if ( ! vwarn(mdoc, WNAMESECINC)) return(0); } if (MDOC_ELEM == n->type && MDOC_Nd == n->tok) return(1); - return(mwarn(mdoc, WNAMESECINC)); + return(vwarn(mdoc, WNAMESECINC)); } static int post_sh_head(POST_ARGS) { - char buf[64]; - enum mdoc_sec sec; + char buf[64]; + enum mdoc_sec sec; + const struct mdoc_node *n; /* * Process a new section. Sections are either "named" or @@ -1347,27 +1388,40 @@ post_sh_head(POST_ARGS) * certain manual sections. */ - assert(MDOC_Sh == mdoc->last->tok); + buf[0] = 0; - (void)xstrlcpys(buf, mdoc->last->child, sizeof(buf)); + for (n = mdoc->last->child; n; n = n->next) { + /* XXX - copied from compact(). */ + assert(MDOC_TEXT == n->type); + if (strlcat(buf, n->string, 64) >= 64) + return(nerr(mdoc, n, ETOOLONG)); + if (NULL == n->next) + continue; + if (strlcat(buf, " ", 64) >= 64) + return(nerr(mdoc, n, ETOOLONG)); + } + sec = mdoc_atosec(buf); - /* The NAME section should always be first. */ + /* + * Check: NAME should always be first, CUSTOM has no roles, + * non-CUSTOM has a conventional order to be followed. + */ - if (SEC_BODY == mdoc->lastnamed && SEC_NAME != sec) - return(mwarn(mdoc, WSECOOO)); + if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) + return(verr(mdoc, ESECNAME)); if (SEC_CUSTOM == sec) return(1); - - /* Check for repeated or out-of-order sections. */ - if (sec == mdoc->lastnamed) - return(mwarn(mdoc, WSECREP)); + return(vwarn(mdoc, WSECREP)); if (sec < mdoc->lastnamed) - return(mwarn(mdoc, WSECOOO)); + return(vwarn(mdoc, WSECOOO)); - /* Check particular section/manual section conventions. */ + /* + * Check particular section/manual conventions. LIBRARY can + * only occur in msec 2, 3 (TODO: are there more of these?). + */ switch (sec) { case (SEC_LIBRARY): @@ -1377,7 +1431,7 @@ post_sh_head(POST_ARGS) case (3): break; default: - return(mwarn(mdoc, WWRONGMSEC)); + return(vwarn(mdoc, WWRONGMSEC)); } break; default: