=================================================================== RCS file: /cvs/mandoc/mdoc_validate.c,v retrieving revision 1.158 retrieving revision 1.197 diff -u -p -r1.158 -r1.197 --- mandoc/mdoc_validate.c 2011/03/07 01:35:51 1.158 +++ mandoc/mdoc_validate.c 2013/10/21 23:47:58 1.197 @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.158 2011/03/07 01:35:51 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.197 2013/10/21 23:47:58 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2011 Ingo Schwarze + * Copyright (c) 2008-2012 Kristaps Dzonsons + * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,7 +19,7 @@ #include "config.h" #endif -#ifndef OSNAME +#ifndef OSNAME #include #endif @@ -33,6 +33,7 @@ #include #include +#include "mdoc.h" #include "mandoc.h" #include "libmdoc.h" #include "libmandoc.h" @@ -71,10 +72,10 @@ static void check_text(struct mdoc *, int, int, char static void check_argv(struct mdoc *, struct mdoc_node *, struct mdoc_argv *); static void check_args(struct mdoc *, struct mdoc_node *); +static int concat(char *, const struct mdoc_node *, size_t); +static enum mdoc_sec a2sec(const char *); +static size_t macro2len(enum mdoct); -static int concat(struct mdoc *, char *, - const struct mdoc_node *, size_t); - static int ebool(POST_ARGS); static int berr_ge1(POST_ARGS); static int bwarn_ge1(POST_ARGS); @@ -96,17 +97,19 @@ static int post_bl_block_width(POST_ARGS); static int post_bl_block_tag(POST_ARGS); static int post_bl_head(POST_ARGS); static int post_bx(POST_ARGS); +static int post_defaults(POST_ARGS); static int post_dd(POST_ARGS); static int post_dt(POST_ARGS); -static int post_defaults(POST_ARGS); -static int post_literal(POST_ARGS); static int post_eoln(POST_ARGS); +static int post_hyph(POST_ARGS); +static int post_ignpar(POST_ARGS); static int post_it(POST_ARGS); static int post_lb(POST_ARGS); +static int post_literal(POST_ARGS); static int post_nm(POST_ARGS); static int post_ns(POST_ARGS); static int post_os(POST_ARGS); -static int post_ignpar(POST_ARGS); +static int post_par(POST_ARGS); static int post_prol(POST_ARGS); static int post_root(POST_ARGS); static int post_rs(POST_ARGS); @@ -140,27 +143,30 @@ static v_post posts_bx[] = { post_bx, NULL }; static v_post posts_bool[] = { ebool, NULL }; static v_post posts_eoln[] = { post_eoln, NULL }; static v_post posts_defaults[] = { post_defaults, NULL }; +static v_post posts_d1[] = { bwarn_ge1, post_hyph, NULL }; static v_post posts_dd[] = { post_dd, post_prol, NULL }; static v_post posts_dl[] = { post_literal, bwarn_ge1, NULL }; static v_post posts_dt[] = { post_dt, post_prol, NULL }; static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; +static v_post posts_hyph[] = { post_hyph, NULL }; +static v_post posts_hyphtext[] = { ewarn_ge1, post_hyph, NULL }; static v_post posts_it[] = { post_it, NULL }; static v_post posts_lb[] = { post_lb, NULL }; -static v_post posts_nd[] = { berr_ge1, NULL }; +static v_post posts_nd[] = { berr_ge1, post_hyph, NULL }; static v_post posts_nm[] = { post_nm, NULL }; static v_post posts_notext[] = { ewarn_eq0, NULL }; static v_post posts_ns[] = { post_ns, NULL }; static v_post posts_os[] = { post_os, post_prol, NULL }; +static v_post posts_pp[] = { post_par, ewarn_eq0, NULL }; static v_post posts_rs[] = { post_rs, NULL }; -static v_post posts_sh[] = { post_ignpar, hwarn_ge1, bwarn_ge1, post_sh, NULL }; -static v_post posts_sp[] = { ewarn_le1, NULL }; -static v_post posts_ss[] = { post_ignpar, hwarn_ge1, bwarn_ge1, NULL }; +static v_post posts_sh[] = { post_ignpar,hwarn_ge1,post_sh,post_hyph,NULL }; +static v_post posts_sp[] = { post_par, ewarn_le1, NULL }; +static v_post posts_ss[] = { post_ignpar, hwarn_ge1, post_hyph, NULL }; static v_post posts_st[] = { post_st, NULL }; static v_post posts_std[] = { post_std, NULL }; static v_post posts_text[] = { ewarn_ge1, NULL }; static v_post posts_text1[] = { ewarn_eq1, NULL }; static v_post posts_vt[] = { post_vt, NULL }; -static v_post posts_wline[] = { bwarn_ge1, NULL }; static v_pre pres_an[] = { pre_an, NULL }; static v_pre pres_bd[] = { pre_display, pre_bd, pre_literal, pre_par, NULL }; static v_pre pres_bl[] = { pre_bl, pre_par, NULL }; @@ -168,8 +174,6 @@ static v_pre pres_d1[] = { pre_display, NULL }; static v_pre pres_dl[] = { pre_literal, pre_display, NULL }; static v_pre pres_dd[] = { pre_dd, NULL }; static v_pre pres_dt[] = { pre_dt, NULL }; -static v_pre pres_er[] = { NULL, NULL }; -static v_pre pres_fd[] = { NULL, NULL }; static v_pre pres_it[] = { pre_it, pre_par, NULL }; static v_pre pres_os[] = { pre_os, NULL }; static v_pre pres_pp[] = { pre_par, NULL }; @@ -177,15 +181,15 @@ static v_pre pres_sh[] = { pre_sh, NULL }; static v_pre pres_ss[] = { pre_ss, NULL }; static v_pre pres_std[] = { pre_std, NULL }; -const struct valids mdoc_valids[MDOC_MAX] = { +static const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Ap */ { pres_dd, posts_dd }, /* Dd */ { pres_dt, posts_dt }, /* Dt */ { pres_os, posts_os }, /* Os */ { pres_sh, posts_sh }, /* Sh */ { pres_ss, posts_ss }, /* Ss */ - { pres_pp, posts_notext }, /* Pp */ - { pres_d1, posts_wline }, /* D1 */ + { pres_pp, posts_pp }, /* Pp */ + { pres_d1, posts_d1 }, /* D1 */ { pres_dl, posts_dl }, /* Dl */ { pres_bd, posts_bd }, /* Bd */ { NULL, NULL }, /* Ed */ @@ -198,11 +202,11 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Cd */ { NULL, NULL }, /* Cm */ { NULL, NULL }, /* Dv */ - { pres_er, NULL }, /* Er */ + { NULL, NULL }, /* Er */ { NULL, NULL }, /* Ev */ { pres_std, posts_std }, /* Ex */ { NULL, NULL }, /* Fa */ - { pres_fd, posts_text }, /* Fd */ + { NULL, posts_text }, /* Fd */ { NULL, NULL }, /* Fl */ { NULL, NULL }, /* Fn */ { NULL, NULL }, /* Ft */ @@ -220,15 +224,15 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_vt }, /* Vt */ { NULL, posts_text }, /* Xr */ { NULL, posts_text }, /* %A */ - { NULL, posts_text }, /* %B */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_hyphtext }, /* %B */ /* FIXME: can be used outside Rs/Re. */ { NULL, posts_text }, /* %D */ { NULL, posts_text }, /* %I */ { NULL, posts_text }, /* %J */ - { NULL, posts_text }, /* %N */ - { NULL, posts_text }, /* %O */ + { NULL, posts_hyphtext }, /* %N */ + { NULL, posts_hyphtext }, /* %O */ { NULL, posts_text }, /* %P */ - { NULL, posts_text }, /* %R */ - { NULL, posts_text }, /* %T */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_hyphtext }, /* %R */ + { NULL, posts_hyphtext }, /* %T */ /* FIXME: can be used outside Rs/Re. */ { NULL, posts_text }, /* %V */ { NULL, NULL }, /* Ac */ { NULL, NULL }, /* Ao */ @@ -268,7 +272,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* So */ { NULL, NULL }, /* Sq */ { NULL, posts_bool }, /* Sm */ - { NULL, NULL }, /* Sx */ + { NULL, posts_hyph }, /* Sx */ { NULL, NULL }, /* Sy */ { NULL, NULL }, /* Tn */ { NULL, NULL }, /* Ux */ @@ -285,7 +289,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Fr */ { NULL, posts_eoln }, /* Ud */ { NULL, posts_lb }, /* Lb */ - { NULL, posts_notext }, /* Lp */ + { pres_pp, posts_pp }, /* Lp */ { NULL, NULL }, /* Lk */ { NULL, posts_defaults }, /* Mt */ { NULL, NULL }, /* Brq */ @@ -296,8 +300,8 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* En */ { NULL, NULL }, /* Dx */ { NULL, posts_text }, /* %Q */ - { NULL, posts_notext }, /* br */ - { pres_pp, posts_sp }, /* sp */ + { NULL, posts_pp }, /* br */ + { NULL, posts_sp }, /* sp */ { NULL, posts_text1 }, /* %U */ { NULL, NULL }, /* Ta */ }; @@ -313,14 +317,38 @@ static const enum mdoct rsord[RSORD_MAX] = { MDOC__R, MDOC__N, MDOC__V, + MDOC__U, MDOC__P, MDOC__Q, - MDOC__D, - MDOC__O, MDOC__C, - MDOC__U + MDOC__D, + MDOC__O }; +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; int mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *n) @@ -389,29 +417,29 @@ mdoc_valid_post(struct mdoc *mdoc) } static int -check_count(struct mdoc *m, enum mdoc_type type, +check_count(struct mdoc *mdoc, enum mdoc_type type, enum check_lvl lvl, enum check_ineq ineq, int val) { const char *p; enum mandocerr t; - if (m->last->type != type) + if (mdoc->last->type != type) return(1); switch (ineq) { case (CHECK_LT): p = "less than "; - if (m->last->nchild < val) + if (mdoc->last->nchild < val) return(1); break; case (CHECK_GT): p = "more than "; - if (m->last->nchild > val) + if (mdoc->last->nchild > val) return(1); break; case (CHECK_EQ): p = ""; - if (val == m->last->nchild) + if (val == mdoc->last->nchild) return(1); break; default: @@ -420,10 +448,10 @@ check_count(struct mdoc *m, enum mdoc_type type, } t = lvl == CHECK_WARN ? MANDOCERR_ARGCWARN : MANDOCERR_ARGCOUNT; - - return(mdoc_vmsg(m, t, m->last->line, m->last->pos, + mandoc_vmsg(t, mdoc->parse, mdoc->last->line, mdoc->last->pos, "want %s%d children (have %d)", - p, val, m->last->nchild)); + p, val, mdoc->last->nchild); + return(1); } static int @@ -488,7 +516,7 @@ hwarn_le1(POST_ARGS) } static void -check_args(struct mdoc *m, struct mdoc_node *n) +check_args(struct mdoc *mdoc, struct mdoc_node *n) { int i; @@ -497,53 +525,34 @@ check_args(struct mdoc *m, struct mdoc_node *n) assert(n->args->argc); for (i = 0; i < (int)n->args->argc; i++) - check_argv(m, n, &n->args->argv[i]); + check_argv(mdoc, n, &n->args->argv[i]); } static void -check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) +check_argv(struct mdoc *mdoc, struct mdoc_node *n, struct mdoc_argv *v) { int i; for (i = 0; i < (int)v->sz; i++) - check_text(m, v->line, v->pos, v->value[i]); + check_text(mdoc, v->line, v->pos, v->value[i]); /* FIXME: move to post_std(). */ if (MDOC_Std == v->arg) - if ( ! (v->sz || m->meta.name)) - mdoc_nmsg(m, n, MANDOCERR_NONAME); + if ( ! (v->sz || mdoc->meta.name)) + mdoc_nmsg(mdoc, n, MANDOCERR_NONAME); } static void -check_text(struct mdoc *m, int ln, int pos, char *p) +check_text(struct mdoc *mdoc, int ln, int pos, char *p) { - int c; - size_t sz; + char *cp; - for ( ; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + if (MDOC_LITERAL & mdoc->flags) + return; - if ('\0' == *p) - break; - - pos += (int)sz; - - if ('\t' == *p) { - if ( ! (MDOC_LITERAL & m->flags)) - mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); - continue; - } - - if (0 == (c = mandoc_special(p))) { - mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; - } - - p += c - 1; - pos += c - 1; - } + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mdoc_pmsg(mdoc, ln, pos + (int)(p - cp), MANDOCERR_BADTAB); } static int @@ -555,10 +564,9 @@ check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type (t == n->parent->type)) return(1); - mdoc_vmsg(mdoc, MANDOCERR_SYNTCHILD, - n->line, n->pos, "want parent %s", - MDOC_ROOT == t ? "" : - mdoc_macronames[tok]); + mandoc_vmsg(MANDOCERR_SYNTCHILD, mdoc->parse, n->line, + n->pos, "want parent %s", MDOC_ROOT == t ? + "" : mdoc_macronames[tok]); return(0); } @@ -656,8 +664,13 @@ pre_bl(PRE_ARGS) comp = 1; break; case (MDOC_Width): - dup = (NULL != n->norm->Bl.width); - width = n->args->argv[i].value[0]; + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + width = n->args->argv[i].value[0]; + dup = (NULL != n->norm->Bl.width); + break; + } + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); break; case (MDOC_Offset): /* NB: this can be empty! */ @@ -697,7 +710,7 @@ pre_bl(PRE_ARGS) if (LIST_column == lt) { n->norm->Bl.ncols = n->args->argv[i].sz; - n->norm->Bl.cols = (const char **) + n->norm->Bl.cols = (void *) n->args->argv[i].value; } } @@ -723,14 +736,14 @@ pre_bl(PRE_ARGS) /* * Validate the width field. Some list types don't need width * types and should be warned about them. Others should have it - * and must also be warned. + * and must also be warned. Yet others have a default and need + * no warning. */ switch (n->norm->Bl.type) { case (LIST_tag): - if (n->norm->Bl.width) - break; - mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG); + if (NULL == n->norm->Bl.width) + mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG); break; case (LIST_column): /* FALLTHROUGH */ @@ -744,6 +757,18 @@ pre_bl(PRE_ARGS) if (n->norm->Bl.width) mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); break; + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "2n"; + break; + case (LIST_enum): + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "3n"; + break; default: break; } @@ -864,8 +889,6 @@ pre_sh(PRE_ARGS) if (MDOC_BLOCK != n->type) return(1); - - mdoc->regs->regs[(int)REG_nS].set = 0; return(check_parent(mdoc, n, MDOC_MAX, MDOC_ROOT)); } @@ -1099,22 +1122,31 @@ static int post_nm(POST_ARGS) { char buf[BUFSIZ]; + int c; - /* If no child specified, make sure we have the meta name. */ - - if (NULL == mdoc->last->child && NULL == mdoc->meta.name) { - mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME); + if (NULL != mdoc->meta.name) return(1); - } else if (mdoc->meta.name) - return(1); - /* If no meta name, set it from the child. */ + /* Try to use our children for setting the meta name. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) - return(0); + if (NULL != mdoc->last->child) { + buf[0] = '\0'; + c = concat(buf, mdoc->last->child, BUFSIZ); + } else + c = 0; - mdoc->meta.name = mandoc_strdup(buf); - + switch (c) { + case (-1): + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); + return(0); + case (0): + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME); + mdoc->meta.name = mandoc_strdup("UNKNOWN"); + break; + default: + mdoc->meta.name = mandoc_strdup(buf); + break; + } return(1); } @@ -1240,7 +1272,7 @@ post_an(POST_ARGS) static int post_it(POST_ARGS) { - int i, cols, rc; + int i, cols; enum mdoc_list lt; struct mdoc_node *n, *c; enum mandocerr er; @@ -1306,10 +1338,10 @@ post_it(POST_ARGS) else er = MANDOCERR_SYNTARGCOUNT; - rc = mdoc_vmsg(mdoc, er, - mdoc->last->line, mdoc->last->pos, + mandoc_vmsg(er, mdoc->parse, mdoc->last->line, + mdoc->last->pos, "columns == %d (have %d)", cols, i); - return(rc); + return(MANDOCERR_ARGCOUNT == er); default: break; } @@ -1320,7 +1352,7 @@ post_it(POST_ARGS) static int post_bl_block(POST_ARGS) { - struct mdoc_node *n; + struct mdoc_node *n, *ni, *nc; /* * These are fairly complicated, so we've broken them into two @@ -1336,13 +1368,42 @@ post_bl_block(POST_ARGS) NULL == n->norm->Bl.width) { if ( ! post_bl_block_tag(mdoc)) return(0); + assert(n->norm->Bl.width); } else if (NULL != n->norm->Bl.width) { if ( ! post_bl_block_width(mdoc)) return(0); - } else - return(1); + assert(n->norm->Bl.width); + } - assert(n->norm->Bl.width); + for (ni = n->body->child; ni; ni = ni->next) { + if (NULL == ni->body) + continue; + nc = ni->body->last; + while (NULL != nc) { + switch (nc->tok) { + case (MDOC_Pp): + /* FALLTHROUGH */ + case (MDOC_Lp): + /* FALLTHROUGH */ + case (MDOC_br): + break; + default: + nc = NULL; + continue; + } + if (NULL == ni->next) { + mdoc_nmsg(mdoc, nc, MANDOCERR_MOVEPAR); + if ( ! mdoc_node_relink(mdoc, nc)) + return(0); + } else if (0 == n->norm->Bl.comp && + LIST_column != n->norm->Bl.type) { + mdoc_nmsg(mdoc, nc, MANDOCERR_IGNPAR); + mdoc_node_delete(mdoc, nc); + } else + break; + nc = ni->body->last; + } + } return(1); } @@ -1370,7 +1431,7 @@ post_bl_block_width(POST_ARGS) width = 6; else if (MDOC_MAX == (tok = mdoc_hash_find(n->norm->Bl.width))) return(1); - else if (0 == (width = mdoc_macro2len(tok))) { + else if (0 == (width = macro2len(tok))) { mdoc_nmsg(mdoc, n, MANDOCERR_BADWIDTH); return(1); } @@ -1385,7 +1446,7 @@ post_bl_block_width(POST_ARGS) assert(i < (int)n->args->argc); - snprintf(buf, NUMSIZ, "%zun", width); + snprintf(buf, NUMSIZ, "%un", (unsigned int)width); free(n->args->argv[i].value[0]); n->args->argv[i].value[0] = mandoc_strdup(buf); @@ -1427,7 +1488,7 @@ post_bl_block_tag(POST_ARGS) break; } - if (0 != (ssz = mdoc_macro2len(nn->tok))) + if (0 != (ssz = macro2len(nn->tok))) sz = ssz; break; @@ -1435,7 +1496,7 @@ post_bl_block_tag(POST_ARGS) /* Defaults to ten ens. */ - snprintf(buf, NUMSIZ, "%zun", sz); + snprintf(buf, NUMSIZ, "%un", (unsigned int)sz); /* * We have to dynamically add this to the macro's argument list. @@ -1501,7 +1562,7 @@ post_bl_head(POST_ARGS) assert(0 == np->args->argv[j].sz); /* - * Accomodate for new-style groff column syntax. Shuffle the + * Accommodate for new-style groff column syntax. Shuffle the * child nodes, all of which must be TEXT, as arguments for the * column field. Then, delete the head children. */ @@ -1511,7 +1572,7 @@ post_bl_head(POST_ARGS) ((size_t)mdoc->last->nchild * sizeof(char *)); mdoc->last->norm->Bl.ncols = np->args->argv[j].sz; - mdoc->last->norm->Bl.cols = (const char **)np->args->argv[j].value; + mdoc->last->norm->Bl.cols = (void *)np->args->argv[j].value; for (i = 0, nn = mdoc->last->child; nn; i++) { np->args->argv[j].value[i] = nn->string; @@ -1530,32 +1591,71 @@ post_bl_head(POST_ARGS) static int post_bl(POST_ARGS) { - struct mdoc_node *n; + struct mdoc_node *nparent, *nprev; /* of the Bl block */ + struct mdoc_node *nblock, *nbody; /* of the Bl */ + struct mdoc_node *nchild, *nnext; /* of the Bl body */ - if (MDOC_HEAD == mdoc->last->type) - return(post_bl_head(mdoc)); - if (MDOC_BLOCK == mdoc->last->type) + nbody = mdoc->last; + switch (nbody->type) { + case (MDOC_BLOCK): return(post_bl_block(mdoc)); - if (MDOC_BODY != mdoc->last->type) + case (MDOC_HEAD): + return(post_bl_head(mdoc)); + case (MDOC_BODY): + break; + default: return(1); + } - for (n = mdoc->last->child; n; n = n->next) { - switch (n->tok) { - case (MDOC_Lp): - /* FALLTHROUGH */ - case (MDOC_Pp): - mdoc_nmsg(mdoc, n, MANDOCERR_CHILD); - /* FALLTHROUGH */ - case (MDOC_It): - /* FALLTHROUGH */ - case (MDOC_Sm): + nchild = nbody->child; + while (NULL != nchild) { + if (MDOC_It == nchild->tok || MDOC_Sm == nchild->tok) { + nchild = nchild->next; continue; - default: - break; } - mdoc_nmsg(mdoc, n, MANDOCERR_SYNTCHILD); - return(0); + mdoc_nmsg(mdoc, nchild, MANDOCERR_CHILD); + + /* + * Move the node out of the Bl block. + * First, collect all required node pointers. + */ + + nblock = nbody->parent; + nprev = nblock->prev; + nparent = nblock->parent; + nnext = nchild->next; + + /* + * Unlink this child. + */ + + assert(NULL == nchild->prev); + if (0 == --nbody->nchild) { + nbody->child = NULL; + nbody->last = NULL; + assert(NULL == nnext); + } else { + nbody->child = nnext; + nnext->prev = NULL; + } + + /* + * Relink this child. + */ + + nchild->parent = nparent; + nchild->prev = nprev; + nchild->next = nblock; + + nblock->prev = nchild; + nparent->nchild++; + if (NULL == nprev) + nparent->child = nchild; + else + nprev->next = nchild; + + nchild = nnext; } return(1); @@ -1574,10 +1674,16 @@ ebool(struct mdoc *mdoc) assert(MDOC_TEXT == mdoc->last->child->type); - if (0 == strcmp(mdoc->last->child->string, "on")) + if (0 == strcmp(mdoc->last->child->string, "on")) { + if (MDOC_Sm == mdoc->last->tok) + mdoc->flags &= ~MDOC_SMOFF; return(1); - if (0 == strcmp(mdoc->last->child->string, "off")) + } + if (0 == strcmp(mdoc->last->child->string, "off")) { + if (MDOC_Sm == mdoc->last->tok) + mdoc->flags |= MDOC_SMOFF; return(1); + } mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADBOOL); return(1); @@ -1685,6 +1791,14 @@ post_rs(POST_ARGS) } /* + * Nothing to sort if only invalid nodes were found + * inside the `Rs' body. + */ + + if (NULL == mdoc->last->child) + return(1); + + /* * The full `Rs' block needs special handling to order the * sub-elements according to `rsord'. Pick through each element * and correctly order it. This is a insertion sort. @@ -1749,7 +1863,48 @@ post_rs(POST_ARGS) return(1); } +/* + * For some arguments of some macros, + * convert all breakable hyphens into ASCII_HYPH. + */ static int +post_hyph(POST_ARGS) +{ + struct mdoc_node *n, *nch; + char *cp; + + n = mdoc->last; + switch (n->type) { + case (MDOC_HEAD): + if (MDOC_Sh == n->tok || MDOC_Ss == n->tok) + break; + return(1); + case (MDOC_BODY): + if (MDOC_D1 == n->tok || MDOC_Nd == n->tok) + break; + return(1); + case (MDOC_ELEM): + break; + default: + return(1); + } + + for (nch = n->child; nch; nch = nch->next) { + if (MDOC_TEXT != nch->type) + continue; + cp = nch->string; + if (3 > strnlen(cp, 3)) + continue; + while ('\0' != *(++cp)) + if ('-' == *cp && + isalpha((unsigned char)cp[-1]) && + isalpha((unsigned char)cp[1])) + *cp = ASCII_HYPH; + } + return(1); +} + +static int post_ns(POST_ARGS) { @@ -1809,7 +1964,9 @@ static int post_sh_head(POST_ARGS) { char buf[BUFSIZ]; + struct mdoc_node *n; enum mdoc_sec sec; + int c; /* * Process a new section. Sections are either "named" or @@ -1818,11 +1975,14 @@ post_sh_head(POST_ARGS) * manual sections. */ - if ( ! concat(mdoc, buf, mdoc->last->child, BUFSIZ)) + sec = SEC_CUSTOM; + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); return(0); + } else if (1 == c) + sec = a2sec(buf); - sec = mdoc_str2sec(buf); - /* The NAME should be first. */ if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) @@ -1830,15 +1990,32 @@ post_sh_head(POST_ARGS) /* The SYNOPSIS gets special attention in other areas. */ - if (SEC_SYNOPSIS == sec) + if (SEC_SYNOPSIS == sec) { + roff_setreg(mdoc->roff, "nS", 1); mdoc->flags |= MDOC_SYNOPSIS; - else + } else { + roff_setreg(mdoc->roff, "nS", 0); mdoc->flags &= ~MDOC_SYNOPSIS; + } /* Mark our last section. */ mdoc->lastsec = sec; + /* + * Set the section attribute for the current HEAD, for its + * parent BLOCK, and for the HEAD children; the latter can + * only be TEXT nodes, so no recursion is needed. + * For other blocks and elements, including .Sh BODY, this is + * done when allocating the node data structures, but for .Sh + * BLOCK and HEAD, the section is still unknown at that time. + */ + + mdoc->last->parent->sec = sec; + mdoc->last->sec = sec; + for (n = mdoc->last->child; n; n = n->next) + n->sec = sec; + /* We don't care about custom sections after this. */ if (SEC_CUSTOM == sec) @@ -1875,7 +2052,8 @@ post_sh_head(POST_ARGS) break; if (*mdoc->meta.msec == '9') break; - mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECMSEC); + mandoc_msg(MANDOCERR_SECMSEC, mdoc->parse, + mdoc->last->line, mdoc->last->pos, buf); break; default: break; @@ -1921,7 +2099,9 @@ pre_par(PRE_ARGS) * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. */ - if (MDOC_Pp != mdoc->last->tok && MDOC_Lp != mdoc->last->tok) + if (MDOC_Pp != mdoc->last->tok && + MDOC_Lp != mdoc->last->tok && + MDOC_br != mdoc->last->tok) return(1); if (MDOC_Bl == n->tok && n->norm->Bl.comp) return(1); @@ -1936,6 +2116,32 @@ pre_par(PRE_ARGS) } static int +post_par(POST_ARGS) +{ + + if (MDOC_ELEM != mdoc->last->type && + MDOC_BLOCK != mdoc->last->type) + return(1); + + if (NULL == mdoc->last->prev) { + if (MDOC_Sh != mdoc->last->parent->tok && + MDOC_Ss != mdoc->last->parent->tok) + return(1); + } else { + if (MDOC_Pp != mdoc->last->prev->tok && + MDOC_Lp != mdoc->last->prev->tok && + (MDOC_br != mdoc->last->tok || + (MDOC_sp != mdoc->last->prev->tok && + MDOC_br != mdoc->last->prev->tok))) + return(1); + } + + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_IGNPAR); + mdoc_node_delete(mdoc, mdoc->last); + return(1); +} + +static int pre_literal(PRE_ARGS) { @@ -1970,22 +2176,27 @@ post_dd(POST_ARGS) { char buf[DATESIZE]; struct mdoc_node *n; + int c; if (mdoc->meta.date) free(mdoc->meta.date); n = mdoc->last; if (NULL == n->child || '\0' == n->child->string[0]) { - mdoc->meta.date = mandoc_normdate(NULL, - mdoc->msg, mdoc->data, n->line, n->pos); + mdoc->meta.date = mandoc_normdate + (mdoc->parse, NULL, n->line, n->pos); return(1); } - if ( ! concat(mdoc, buf, n->child, DATESIZE)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, DATESIZE))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); + } - mdoc->meta.date = mandoc_normdate(buf, - mdoc->msg, mdoc->data, n->line, n->pos); + assert(c); + mdoc->meta.date = mandoc_normdate + (mdoc->parse, buf, n->line, n->pos); return(1); } @@ -2012,7 +2223,7 @@ post_dt(POST_ARGS) if (NULL != (nn = n->child)) for (p = nn->string; *p; p++) { - if (toupper((u_char)*p) == *p) + if (toupper((unsigned char)*p) == *p) continue; /* @@ -2058,7 +2269,7 @@ post_dt(POST_ARGS) * arch = NULL */ - cp = mdoc_a2msec(nn->string); + cp = mandoc_a2msec(nn->string); if (cp) { mdoc->meta.vol = mandoc_strdup(cp); mdoc->meta.msec = mandoc_strdup(nn->string); @@ -2083,9 +2294,9 @@ post_dt(POST_ARGS) free(mdoc->meta.vol); mdoc->meta.vol = mandoc_strdup(cp); } else { - /* FIXME: warn about bad arch. */ cp = mdoc_a2arch(nn->string); if (NULL == cp) { + mdoc_nmsg(mdoc, nn, MANDOCERR_BADVOLARCH); free(mdoc->meta.vol); mdoc->meta.vol = mandoc_strdup(nn->string); } else @@ -2127,7 +2338,8 @@ post_bx(POST_ARGS) n = mdoc->last->child; if (n && NULL != (n = n->next)) - *n->string = toupper((unsigned char)*n->string); + *n->string = (char)toupper + ((unsigned char)*n->string); return(1); } @@ -2137,6 +2349,7 @@ post_os(POST_ARGS) { struct mdoc_node *n; char buf[BUFSIZ]; + int c; #ifndef OSNAME struct utsname utsname; #endif @@ -2144,30 +2357,36 @@ post_os(POST_ARGS) n = mdoc->last; /* - * Set the operating system by way of the `Os' macro. Note that - * if an argument isn't provided and -DOSNAME="\"foo\"" is - * provided during compilation, this value will be used instead - * of filling in "sysname release" from uname(). + * Set the operating system by way of the `Os' macro. + * The order of precedence is: + * 1. the argument of the `Os' macro, unless empty + * 2. the -Ios=foo command line argument, if provided + * 3. -DOSNAME="\"foo\"", if provided during compilation + * 4. "sysname release" from uname(3) */ - if (mdoc->meta.os) - free(mdoc->meta.os); + free(mdoc->meta.os); - if ( ! concat(mdoc, buf, n->child, BUFSIZ)) + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, BUFSIZ))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); return(0); + } - /* XXX: yes, these can all be dynamically-adjusted buffers, but - * it's really not worth the extra hackery. - */ + assert(c); if ('\0' == buf[0]) { + if (mdoc->defos) { + mdoc->meta.os = mandoc_strdup(mdoc->defos); + return(1); + } #ifdef OSNAME if (strlcat(buf, OSNAME, BUFSIZ) >= BUFSIZ) { mdoc_nmsg(mdoc, n, MANDOCERR_MEM); return(0); } #else /*!OSNAME */ - if (uname(&utsname)) { + if (-1 == uname(&utsname)) { mdoc_nmsg(mdoc, n, MANDOCERR_UNAME); mdoc->meta.os = mandoc_strdup("UNKNOWN"); return(post_prol(mdoc)); @@ -2221,36 +2440,130 @@ post_std(POST_ARGS) return(1); } +/* + * Concatenate a node, stopping at the first non-text. + * Concatenation is separated by a single whitespace. + * Returns -1 on fatal (string overrun) error, 0 if child nodes were + * encountered, 1 otherwise. + */ static int -concat(struct mdoc *m, char *p, const struct mdoc_node *n, size_t sz) +concat(char *p, const struct mdoc_node *n, size_t sz) { - p[0] = '\0'; + for ( ; NULL != n; n = n->next) { + if (MDOC_TEXT != n->type) + return(0); + if ('\0' != p[0] && strlcat(p, " ", sz) >= sz) + return(-1); + if (strlcat(p, n->string, sz) >= sz) + return(-1); + concat(p, n->child, sz); + } - /* - * Concatenate sibling nodes together. All siblings must be of - * type MDOC_TEXT or an assertion is raised. Concatenation is - * separated by a single whitespace. Returns 0 on fatal (string - * overrun) error. - */ + return(1); +} - for ( ; n; n = n->next) { - assert(MDOC_TEXT == n->type); +static enum mdoc_sec +a2sec(const char *p) +{ + int i; - if (strlcat(p, n->string, sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); - return(0); - } + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return((enum mdoc_sec)i); - if (NULL == n->next) - continue; + return(SEC_CUSTOM); +} - if (strlcat(p, " ", sz) >= sz) { - mdoc_nmsg(m, n, MANDOCERR_MEM); - return(0); - } - } +static size_t +macro2len(enum mdoct macro) +{ - return(1); + switch (macro) { + case(MDOC_Ad): + return(12); + case(MDOC_Ao): + return(12); + case(MDOC_An): + return(12); + case(MDOC_Aq): + return(12); + case(MDOC_Ar): + return(12); + case(MDOC_Bo): + return(12); + case(MDOC_Bq): + return(12); + case(MDOC_Cd): + return(12); + case(MDOC_Cm): + return(10); + case(MDOC_Do): + return(10); + case(MDOC_Dq): + return(12); + case(MDOC_Dv): + return(12); + case(MDOC_Eo): + return(12); + case(MDOC_Em): + return(10); + case(MDOC_Er): + return(17); + case(MDOC_Ev): + return(15); + case(MDOC_Fa): + return(12); + case(MDOC_Fl): + return(10); + case(MDOC_Fo): + return(16); + case(MDOC_Fn): + return(16); + case(MDOC_Ic): + return(10); + case(MDOC_Li): + return(16); + case(MDOC_Ms): + return(6); + case(MDOC_Nm): + return(10); + case(MDOC_No): + return(12); + case(MDOC_Oo): + return(10); + case(MDOC_Op): + return(14); + case(MDOC_Pa): + return(32); + case(MDOC_Pf): + return(12); + case(MDOC_Po): + return(12); + case(MDOC_Pq): + return(12); + case(MDOC_Ql): + return(16); + case(MDOC_Qo): + return(12); + case(MDOC_So): + return(12); + case(MDOC_Sq): + return(12); + case(MDOC_Sy): + return(6); + case(MDOC_Sx): + return(16); + case(MDOC_Tn): + return(10); + case(MDOC_Va): + return(12); + case(MDOC_Vt): + return(12); + case(MDOC_Xr): + return(10); + default: + break; + }; + return(0); } -