=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.1 retrieving revision 1.11 diff -u -p -r1.1 -r1.11 --- mandoc/mdoc_validate.c 2009/03/25 15:17:49 1.1 +++ mandoc/mdoc_validate.c 2009/06/17 09:41:00 1.11 @@ -1,20 +1,18 @@ -/* $Id: mdoc_validate.c,v 1.1 2009/03/25 15:17:49 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.11 2009/06/17 09:41:00 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all - * copies. + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include @@ -22,24 +20,18 @@ #include #include #include +#include #include "libmdoc.h" /* FIXME: .Bl -diag can't have non-text children in HEAD. */ /* TODO: ignoring Pp (it's superfluous in some invocations). */ -/* - * Pre- and post-validate macros as they're parsed. Pre-validation - * occurs when the macro has been detected and its arguments parsed. - * Post-validation occurs when all child macros have also been parsed. - * In the ELEMENT case, this is simply the parameters of the macro; in - * the BLOCK case, this is the HEAD, BODY, TAIL and so on. - */ - #define PRE_ARGS struct mdoc *mdoc, const struct mdoc_node *n #define POST_ARGS struct mdoc *mdoc enum merr { + ETOOLONG, EESCAPE, EPRINT, ENODATA, @@ -57,7 +49,12 @@ enum merr { }; enum mwarn { + WPRINT, + WNOWIDTH, + WMISSWIDTH, WESCAPE, + WDEPESC, + WDEPCOL, WWRONGMSEC, WSECOOO, WSECREP, @@ -82,8 +79,6 @@ struct valids { v_post *post; }; -/* Utility checks. */ - static int pwarn(struct mdoc *, int, int, enum mwarn); static int perr(struct mdoc *, int, int, enum merr); static int check_parent(PRE_ARGS, int, enum mdoc_type); @@ -102,10 +97,11 @@ static int err_child_gt(struct mdoc *, const char *, i static int warn_child_gt(struct mdoc *, const char *, int); static int err_child_eq(struct mdoc *, const char *, int); static int warn_child_eq(struct mdoc *, const char *, int); -static inline int count_child(struct mdoc *); -static inline int warn_count(struct mdoc *, const char *, +static int count_child(struct mdoc *); +static int warn_print(struct mdoc *, int, int); +static int warn_count(struct mdoc *, const char *, int, const char *, int); -static inline int err_count(struct mdoc *, const char *, +static int err_count(struct mdoc *, const char *, int, const char *, int); static int pre_an(PRE_ARGS); static int pre_bd(PRE_ARGS); @@ -137,7 +133,6 @@ static int bwarn_ge1(POST_ARGS); static int hwarn_eq1(POST_ARGS); static int ewarn_ge1(POST_ARGS); static int ebool(POST_ARGS); - static int post_an(POST_ARGS); static int post_args(POST_ARGS); static int post_at(POST_ARGS); @@ -151,8 +146,8 @@ static int post_sh_body(POST_ARGS); static int post_sh_head(POST_ARGS); static int post_st(POST_ARGS); -#define mwarn(m, t) nwarn((m), (m)->last, (t)) -#define merr(m, t) nerr((m), (m)->last, (t)) +#define vwarn(m, t) nwarn((m), (m)->last, (t)) +#define verr(m, t) nerr((m), (m)->last, (t)) #define nwarn(m, n, t) pwarn((m), (n)->line, (n)->pos, (t)) #define nerr(m, n, t) perr((m), (n)->line, (n)->pos, (t)) @@ -197,7 +192,7 @@ static v_post posts_bf[] = { hwarn_le1, post_bf, NULL static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; const struct valids mdoc_valids[MDOC_MAX] = { - { NULL, NULL }, /* \" */ + { NULL, NULL }, /* Ap */ { pres_dd, posts_text }, /* Dd */ { pres_dt, NULL }, /* Dt */ { pres_os, NULL }, /* Os */ @@ -304,9 +299,8 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Fr */ { NULL, posts_notext }, /* Ud */ { pres_lb, posts_lb }, /* Lb */ - { NULL, NULL }, /* Ap */ { NULL, posts_pp }, /* Lp */ - { NULL, posts_text }, /* Lk */ + { NULL, NULL }, /* Lk */ { NULL, posts_text }, /* Mt */ { NULL, posts_wline }, /* Brq */ { NULL, NULL }, /* Bro */ @@ -319,6 +313,11 @@ const struct valids mdoc_valids[MDOC_MAX] = { }; +#ifdef __linux__ +extern size_t strlcat(char *, const char *, size_t); +#endif + + int mdoc_valid_pre(struct mdoc *mdoc, const struct mdoc_node *n) @@ -384,6 +383,9 @@ perr(struct mdoc *m, int line, int pos, enum merr type p = NULL; switch (type) { + case (ETOOLONG): + p = "text argument too long"; + break; case (EESCAPE): p = "invalid escape sequence"; break; @@ -461,9 +463,25 @@ pwarn(struct mdoc *m, int line, int pos, enum mwarn ty p = "prologue macros out-of-order"; c = WARN_COMPAT; break; + case (WDEPCOL): + p = "deprecated column argument syntax"; + c = WARN_COMPAT; + break; + case (WNOWIDTH): + p = "superfluous width argument"; + break; + case (WMISSWIDTH): + p = "missing width argument"; + break; + case (WPRINT): + p = "invalid character"; + break; case (WESCAPE): p = "invalid escape sequence"; break; + case (WDEPESC): + p = "deprecated special-character escape"; + break; case (WNOLINE): p = "suggested no line arguments"; break; @@ -498,7 +516,15 @@ pwarn(struct mdoc *m, int line, int pos, enum mwarn ty } +static int +warn_print(struct mdoc *m, int ln, int pos) +{ + if (MDOC_IGN_CHARS & m->pflags) + return(pwarn(m, ln, pos, WPRINT)); + return(perr(m, ln, pos, EPRINT)); +} + static inline int warn_count(struct mdoc *m, const char *k, int want, const char *v, int has) @@ -695,20 +721,24 @@ check_text(struct mdoc *mdoc, int line, int pos, const { size_t c; - /* FIXME: indicate deprecated escapes \*(xx and \*x. */ - for ( ; *p; p++) { if ('\t' == *p) { if ( ! (MDOC_LITERAL & mdoc->flags)) - return(perr(mdoc, line, pos, EPRINT)); + if ( ! warn_print(mdoc, line, pos)) + return(0); } else if ( ! isprint((u_char)*p)) - return(perr(mdoc, line, pos, EPRINT)); + if ( ! warn_print(mdoc, line, pos)) + return(0); if ('\\' != *p) continue; c = mdoc_isescape(p); if (c) { + /* See if form is deprecated. */ + if ('*' == p[1]) + if ( ! pwarn(mdoc, line, pos, WDEPESC)) + return(0); p += (int)c - 1; continue; } @@ -764,7 +794,7 @@ pre_display(PRE_ARGS) static int pre_bl(PRE_ARGS) { - int i, type, width, offset; + int pos, col, type, width, offset; if (MDOC_BLOCK != n->type) return(1); @@ -773,11 +803,11 @@ pre_bl(PRE_ARGS) /* Make sure that only one type of list is specified. */ - type = offset = width = -1; + type = offset = width = col = -1; /* LINTED */ - for (i = 0; i < (int)n->args->argc; i++) - switch (n->args->argv[i].arg) { + for (pos = 0; pos < (int)n->args->argc; pos++) + switch (n->args->argv[pos].arg) { case (MDOC_Bullet): /* FALLTHROUGH */ case (MDOC_Dash): @@ -799,23 +829,21 @@ pre_bl(PRE_ARGS) case (MDOC_Inset): /* FALLTHROUGH */ case (MDOC_Column): - if (-1 == type) { - type = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EMULTILIST)); + if (-1 != type) + return(nerr(mdoc, n, EMULTILIST)); + type = n->args->argv[pos].arg; + col = pos; + break; case (MDOC_Width): - if (-1 == width) { - width = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EARGREP)); + if (-1 != width) + return(nerr(mdoc, n, EARGREP)); + width = n->args->argv[pos].arg; + break; case (MDOC_Offset): - if (-1 == offset) { - offset = n->args->argv[i].arg; - break; - } - return(nerr(mdoc, n, EARGREP)); + if (-1 != offset) + return(nerr(mdoc, n, EARGREP)); + offset = n->args->argv[pos].arg; + break; default: break; } @@ -823,7 +851,17 @@ pre_bl(PRE_ARGS) if (-1 == type) return(nerr(mdoc, n, ELISTTYPE)); + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. + */ + switch (type) { + case (MDOC_Tag): + if (-1 == width && ! nwarn(mdoc, n, WMISSWIDTH)) + return(0); + break; case (MDOC_Column): /* FALLTHROUGH */ case (MDOC_Diag): @@ -831,17 +869,25 @@ pre_bl(PRE_ARGS) case (MDOC_Inset): /* FALLTHROUGH */ case (MDOC_Item): - if (-1 == width) + if (-1 != width && ! nwarn(mdoc, n, WNOWIDTH)) + return(0); + break; + default: + break; + } + + /* + * General validation of fields. + */ + + switch (type) { + case (MDOC_Column): + assert(col >= 0); + if (0 == n->args->argv[col].sz) break; - return(mdoc_nwarn(mdoc, n, WARN_SYNTAX, - "superfluous %s argument", - mdoc_argnames[MDOC_Width])); - case (MDOC_Tag): - if (-1 != width) - break; - return(mdoc_nwarn(mdoc, n, WARN_SYNTAX, - "suggest %s argument", - mdoc_argnames[MDOC_Width])); + if ( ! nwarn(mdoc, n, WDEPCOL)) + return(0); + break; default: break; } @@ -960,7 +1006,7 @@ static int pre_er(PRE_ARGS) { - return(check_msec(mdoc, n, 2, 0)); + return(check_msec(mdoc, n, 2, 3, 9, 0)); } @@ -1039,11 +1085,11 @@ post_bf(POST_ARGS) return(mdoc_err(mdoc, "text argument expected")); p = head->child->string; - if (xstrcmp(p, "Em")) + if (0 == strcmp(p, "Em")) return(1); - else if (xstrcmp(p, "Li")) + else if (0 == strcmp(p, "Li")) return(1); - else if (xstrcmp(p, "Sm")) + else if (0 == strcmp(p, "Sm")) return(1); return(mdoc_nerr(mdoc, head->child, "invalid font")); } @@ -1063,7 +1109,7 @@ post_nm(POST_ARGS) return(1); if (mdoc->meta.name) return(1); - return(merr(mdoc, ENAME)); + return(verr(mdoc, ENAME)); } @@ -1074,10 +1120,10 @@ post_at(POST_ARGS) if (NULL == mdoc->last->child) return(1); if (MDOC_TEXT != mdoc->last->child->type) - return(merr(mdoc, EATT)); + return(verr(mdoc, EATT)); if (mdoc_a2att(mdoc->last->child->string)) return(1); - return(merr(mdoc, EATT)); + return(verr(mdoc, EATT)); } @@ -1088,12 +1134,12 @@ post_an(POST_ARGS) if (mdoc->last->args) { if (NULL == mdoc->last->child) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } if (mdoc->last->child) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } @@ -1103,7 +1149,7 @@ post_args(POST_ARGS) if (mdoc->last->args) return(1); - return(merr(mdoc, ELINE)); + return(verr(mdoc, ELINE)); } @@ -1118,7 +1164,7 @@ post_it(POST_ARGS) n = mdoc->last->parent->parent; if (NULL == n->args) - return(merr(mdoc, ELISTTYPE)); + return(verr(mdoc, ELISTTYPE)); /* Some types require block-head, some not. */ @@ -1156,12 +1202,12 @@ post_it(POST_ARGS) } if (-1 == type) - return(merr(mdoc, ELISTTYPE)); + return(verr(mdoc, ELISTTYPE)); switch (type) { case (MDOC_Tag): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); break; case (MDOC_Hang): @@ -1172,10 +1218,10 @@ post_it(POST_ARGS) /* FALLTHROUGH */ case (MDOC_Diag): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); if (NULL == mdoc->last->body->child) - if ( ! mwarn(mdoc, WMULTILINE)) + if ( ! vwarn(mdoc, WMULTILINE)) return(0); break; case (MDOC_Bullet): @@ -1188,18 +1234,18 @@ post_it(POST_ARGS) /* FALLTHROUGH */ case (MDOC_Item): if (mdoc->last->head->child) - if ( ! mwarn(mdoc, WNOLINE)) + if ( ! vwarn(mdoc, WNOLINE)) return(0); if (NULL == mdoc->last->body->child) - if ( ! mwarn(mdoc, WMULTILINE)) + if ( ! vwarn(mdoc, WMULTILINE)) return(0); break; case (MDOC_Column): if (NULL == mdoc->last->head->child) - if ( ! mwarn(mdoc, WLINE)) + if ( ! vwarn(mdoc, WLINE)) return(0); if (mdoc->last->body->child) - if ( ! mwarn(mdoc, WNOMULTILINE)) + if ( ! vwarn(mdoc, WNOMULTILINE)) return(0); c = mdoc->last->child; for (i = 0; c && MDOC_HEAD == c->type; c = c->next) @@ -1248,9 +1294,9 @@ ebool(struct mdoc *mdoc) for (n = mdoc->last->child; n; n = n->next) { if (MDOC_TEXT != n->type) break; - if (xstrcmp(n->string, "on")) + if (0 == strcmp(n->string, "on")) continue; - if (xstrcmp(n->string, "off")) + if (0 == strcmp(n->string, "off")) continue; break; } @@ -1266,14 +1312,14 @@ post_root(POST_ARGS) { if (NULL == mdoc->first->child) - return(merr(mdoc, ENODATA)); + return(verr(mdoc, ENODATA)); if (SEC_PROLOGUE == mdoc->lastnamed) - return(merr(mdoc, ENOPROLOGUE)); + return(verr(mdoc, ENOPROLOGUE)); if (MDOC_BLOCK != mdoc->first->child->type) - return(merr(mdoc, ENODATA)); + return(verr(mdoc, ENODATA)); if (MDOC_Sh != mdoc->first->child->tok) - return(merr(mdoc, ENODATA)); + return(verr(mdoc, ENODATA)); return(1); } @@ -1285,7 +1331,7 @@ post_st(POST_ARGS) if (mdoc_a2st(mdoc->last->child->string)) return(1); - return(mwarn(mdoc, WBADSTAND)); + return(vwarn(mdoc, WBADSTAND)); } @@ -1317,28 +1363,29 @@ post_sh_body(POST_ARGS) */ if (NULL == (n = mdoc->last->child)) - return(mwarn(mdoc, WNAMESECINC)); + return(vwarn(mdoc, WNAMESECINC)); for ( ; n && n->next; n = n->next) { if (MDOC_ELEM == n->type && MDOC_Nm == n->tok) continue; if (MDOC_TEXT == n->type) continue; - if ( ! mwarn(mdoc, WNAMESECINC)) + if ( ! vwarn(mdoc, WNAMESECINC)) return(0); } if (MDOC_ELEM == n->type && MDOC_Nd == n->tok) return(1); - return(mwarn(mdoc, WNAMESECINC)); + return(vwarn(mdoc, WNAMESECINC)); } static int post_sh_head(POST_ARGS) { - char buf[64]; - enum mdoc_sec sec; + char buf[64]; + enum mdoc_sec sec; + const struct mdoc_node *n; /* * Process a new section. Sections are either "named" or @@ -1349,23 +1396,34 @@ post_sh_head(POST_ARGS) assert(MDOC_Sh == mdoc->last->tok); - (void)xstrlcpys(buf, mdoc->last->child, sizeof(buf)); + /* This is just concat() inlined, which is irritating. */ + buf[0] = 0; + for (n = mdoc->last->child; n; n = n->next) { + assert(MDOC_TEXT == n->type); + if (strlcat(buf, n->string, 64) >= 64) + return(nerr(mdoc, n, ETOOLONG)); + if (NULL == n->next) + continue; + if (strlcat(buf, " ", 64) >= 64) + return(nerr(mdoc, n, ETOOLONG)); + } + sec = mdoc_atosec(buf); /* The NAME section should always be first. */ if (SEC_BODY == mdoc->lastnamed && SEC_NAME != sec) - return(mwarn(mdoc, WSECOOO)); + return(vwarn(mdoc, WSECOOO)); if (SEC_CUSTOM == sec) return(1); /* Check for repeated or out-of-order sections. */ if (sec == mdoc->lastnamed) - return(mwarn(mdoc, WSECREP)); + return(vwarn(mdoc, WSECREP)); if (sec < mdoc->lastnamed) - return(mwarn(mdoc, WSECOOO)); + return(vwarn(mdoc, WSECOOO)); /* Check particular section/manual section conventions. */ @@ -1377,7 +1435,7 @@ post_sh_head(POST_ARGS) case (3): break; default: - return(mwarn(mdoc, WWRONGMSEC)); + return(vwarn(mdoc, WWRONGMSEC)); } break; default: