=================================================================== RCS file: /cvs/mandoc/tbl_layout.c,v retrieving revision 1.28 retrieving revision 1.32 diff -u -p -r1.28 -r1.32 --- mandoc/tbl_layout.c 2014/10/07 14:07:03 1.28 +++ mandoc/tbl_layout.c 2015/01/26 18:42:30 1.32 @@ -1,7 +1,7 @@ -/* $Id: tbl_layout.c,v 1.28 2014/10/07 14:07:03 schwarze Exp $ */ +/* $Id: tbl_layout.c,v 1.32 2015/01/26 18:42:30 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2012, 2014 Ingo Schwarze + * Copyright (c) 2012, 2014, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -34,15 +34,7 @@ struct tbl_phrase { enum tbl_cellt key; }; -/* - * FIXME: we can make this parse a lot nicer by, when an error is - * encountered in a layout key, bailing to the next key (i.e. to the - * next whitespace then continuing). - */ - -#define KEYS_MAX 11 - -static const struct tbl_phrase keys[KEYS_MAX] = { +static const struct tbl_phrase keys[] = { { 'c', TBL_CELL_CENTRE }, { 'r', TBL_CELL_RIGHT }, { 'l', TBL_CELL_LEFT }, @@ -55,58 +47,30 @@ static const struct tbl_phrase keys[KEYS_MAX] = { { '=', TBL_CELL_DHORIZ } }; -static int mods(struct tbl_node *, struct tbl_cell *, +#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void mods(struct tbl_node *, struct tbl_cell *, int, const char *, int *); -static int cell(struct tbl_node *, struct tbl_row *, +static void cell(struct tbl_node *, struct tbl_row *, int, const char *, int *); -static void row(struct tbl_node *, int, const char *, int *); static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *, enum tbl_cellt, int vert); -static int +static void mods(struct tbl_node *tbl, struct tbl_cell *cp, int ln, const char *p, int *pos) { - char buf[5]; - int i; + char *endptr; - /* Not all types accept modifiers. */ +mod: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; - switch (cp->pos) { - case TBL_CELL_DOWN: - /* FALLTHROUGH */ - case TBL_CELL_HORIZ: - /* FALLTHROUGH */ - case TBL_CELL_DHORIZ: - return(1); - default: - break; - } + /* Row delimiters and cell specifiers end modifier lists. */ -mod: - /* - * XXX: since, at least for now, modifiers are non-conflicting - * (are separable by value, regardless of position), we let - * modifiers come in any order. The existing tbl doesn't let - * this happen. - */ - switch (p[*pos]) { - case '\0': - /* FALLTHROUGH */ - case ' ': - /* FALLTHROUGH */ - case '\t': - /* FALLTHROUGH */ - case ',': - /* FALLTHROUGH */ - case '.': - /* FALLTHROUGH */ - case '|': - return(1); - default: - break; - } + if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL) + return; /* Throw away parenthesised expression. */ @@ -118,69 +82,65 @@ mod: (*pos)++; goto mod; } - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse, ln, *pos, NULL); - return(0); + return; } /* Parse numerical spacing from modifier string. */ if (isdigit((unsigned char)p[*pos])) { - for (i = 0; i < 4; i++) { - if ( ! isdigit((unsigned char)p[*pos + i])) - break; - buf[i] = p[*pos + i]; - } - buf[i] = '\0'; - - /* No greater than 4 digits. */ - - if (4 == i) { - mandoc_msg(MANDOCERR_TBLLAYOUT, - tbl->parse, ln, *pos, NULL); - return(0); - } - - *pos += i; - cp->spacing = (size_t)atoi(buf); - + cp->spacing = strtoull(p + *pos, &endptr, 10); + *pos = endptr - p; goto mod; - /* NOTREACHED */ } - /* TODO: GNU has many more extensions. */ - switch (tolower((unsigned char)p[(*pos)++])) { - case 'z': - cp->flags |= TBL_CELL_WIGN; + case 'b': + /* FALLTHROUGH */ + case 'i': + /* FALLTHROUGH */ + case 'r': + (*pos)--; + break; + case 'd': + cp->flags |= TBL_CELL_BALIGN; goto mod; - case 'u': - cp->flags |= TBL_CELL_UP; - goto mod; case 'e': cp->flags |= TBL_CELL_EQUAL; goto mod; + case 'f': + break; + case 'm': + mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse, + ln, *pos, "m"); + goto mod; + case 'p': + /* FALLTHROUGH */ + case 'v': + if (p[*pos] == '-' || p[*pos] == '+') + (*pos)++; + while (isdigit((unsigned char)p[*pos])) + (*pos)++; + goto mod; case 't': cp->flags |= TBL_CELL_TALIGN; goto mod; - case 'd': - cp->flags |= TBL_CELL_BALIGN; + case 'u': + cp->flags |= TBL_CELL_UP; goto mod; case 'w': /* XXX for now, ignore minimal column width */ goto mod; - case 'f': - break; - case 'r': - /* FALLTHROUGH */ - case 'b': - /* FALLTHROUGH */ - case 'i': - (*pos)--; - break; + case 'x': + cp->flags |= TBL_CELL_WMAX; + goto mod; + case 'z': + cp->flags |= TBL_CELL_WIGN; + goto mod; default: - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos - 1, NULL); - return(0); + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos - 1, "%c", p[*pos - 1]); + goto mod; } switch (tolower((unsigned char)p[(*pos)++])) { @@ -199,20 +159,13 @@ mod: case 'r': goto mod; default: - break; - } - if (isalnum((unsigned char)p[*pos - 1])) { mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse, ln, *pos - 1, "TS f%c", p[*pos - 1]); goto mod; } - - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos - 1, NULL); - return(0); } -static int +static void cell(struct tbl_node *tbl, struct tbl_row *rp, int ln, const char *p, int *pos) { @@ -221,16 +174,24 @@ cell(struct tbl_node *tbl, struct tbl_row *rp, /* Handle vertical lines. */ - for (vert = 0; '|' == p[*pos]; ++*pos) - vert++; - while (' ' == p[*pos]) + vert = 0; +again: + while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') { + if (p[*pos] == '|') { + if (vert < 2) + vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + tbl->parse, ln, *pos, NULL); + } (*pos)++; + } /* Handle trailing vertical lines */ if ('.' == p[*pos] || '\0' == p[*pos]) { rp->vert = vert; - return(1); + return; } /* Parse the column position (`c', `l', `r', ...). */ @@ -239,126 +200,81 @@ cell(struct tbl_node *tbl, struct tbl_row *rp, if (tolower((unsigned char)p[*pos]) == keys[i].name) break; - if (KEYS_MAX == i) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos, NULL); - return(0); + if (i == KEYS_MAX) { + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos, "%c", p[*pos]); + (*pos)++; + goto again; } - c = keys[i].key; - /* - * If a span cell is found first, raise a warning and abort the - * parse. If a span cell is found and the last layout element - * isn't a "normal" layout, bail. - * - * FIXME: recover from this somehow? - */ + /* Special cases of spanners. */ - if (TBL_CELL_SPAN == c) { - if (NULL == rp->first) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos, NULL); - return(0); - } else if (rp->last) - switch (rp->last->pos) { - case TBL_CELL_HORIZ: - /* FALLTHROUGH */ - case TBL_CELL_DHORIZ: - mandoc_msg(MANDOCERR_TBLLAYOUT, - tbl->parse, ln, *pos, NULL); - return(0); - default: - break; - } - } + if (c == TBL_CELL_SPAN) { + if (rp->last == NULL) + mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN, + tbl->parse, ln, *pos, NULL); + else if (rp->last->pos == TBL_CELL_HORIZ || + rp->last->pos == TBL_CELL_DHORIZ) + c = rp->last->pos; + } else if (c == TBL_CELL_DOWN && rp == tbl->first_row) + mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN, + tbl->parse, ln, *pos, NULL); - /* - * If a vertical spanner is found, we may not be in the first - * row. - */ - - if (TBL_CELL_DOWN == c && rp == tbl->first_row) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL); - return(0); - } - (*pos)++; - /* Disallow adjacent spacers. */ - - if (vert > 2) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL); - return(0); - } - /* Allocate cell then parse its modifiers. */ - return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos)); + mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos); } -static void -row(struct tbl_node *tbl, int ln, const char *p, int *pos) +void +tbl_layout(struct tbl_node *tbl, int ln, const char *p) { struct tbl_row *rp; + int pos; -row: /* - * EBNF describing this section: - * - * row ::= row_list [:space:]* [.]?[\n] - * row_list ::= [:space:]* row_elem row_tail - * row_tail ::= [:space:]*[,] row_list | - * epsilon - * row_elem ::= [\t\ ]*[:alpha:]+ - */ + pos = 0; + rp = NULL; - rp = mandoc_calloc(1, sizeof(struct tbl_row)); - if (tbl->last_row) - tbl->last_row->next = rp; - else - tbl->first_row = rp; - tbl->last_row = rp; + for (;;) { + /* Skip whitespace before and after each cell. */ -cell: - while (isspace((unsigned char)p[*pos])) - (*pos)++; + while (p[pos] == ' ' || p[pos] == '\t') + pos++; - /* Safely exit layout context. */ + switch (p[pos]) { + case ',': /* Next row on this input line. */ + pos++; + rp = NULL; + continue; + case '\0': /* Next row on next input line. */ + return; + case '.': /* End of layout. */ + pos++; + tbl->part = TBL_PART_DATA; + if (tbl->first_row != NULL) + return; + mandoc_msg(MANDOCERR_TBLLAYOUT_NONE, + tbl->parse, ln, pos, NULL); + rp = mandoc_calloc(1, sizeof(*rp)); + cell_alloc(tbl, rp, TBL_CELL_LEFT, 0); + tbl->first_row = tbl->last_row = rp; + return; + default: /* Cell. */ + break; + } - if ('.' == p[*pos]) { - tbl->part = TBL_PART_DATA; - if (NULL == tbl->first_row) - mandoc_msg(MANDOCERR_TBLNOLAYOUT, - tbl->parse, ln, *pos, NULL); - (*pos)++; - return; + if (rp == NULL) { /* First cell on this line. */ + rp = mandoc_calloc(1, sizeof(*rp)); + if (tbl->last_row) + tbl->last_row->next = rp; + else + tbl->first_row = rp; + tbl->last_row = rp; + } + cell(tbl, rp, ln, p, &pos); } - - /* End (and possibly restart) a row. */ - - if (',' == p[*pos]) { - (*pos)++; - goto row; - } else if ('\0' == p[*pos]) - return; - - if ( ! cell(tbl, rp, ln, p, pos)) - return; - - goto cell; - /* NOTREACHED */ -} - -int -tbl_layout(struct tbl_node *tbl, int ln, const char *p) -{ - int pos; - - pos = 0; - row(tbl, ln, p, &pos); - - /* Always succeed. */ - return(1); } static struct tbl_cell *