=================================================================== RCS file: /cvs/mandoc/mdoc.c,v retrieving revision 1.136 retrieving revision 1.147 diff -u -p -r1.136 -r1.147 --- mandoc/mdoc.c 2010/05/17 22:11:42 1.136 +++ mandoc/mdoc.c 2010/06/26 15:36:37 1.147 @@ -1,6 +1,6 @@ -/* $Id: mdoc.c,v 1.136 2010/05/17 22:11:42 kristaps Exp $ */ +/* $Id: mdoc.c,v 1.147 2010/06/26 15:36:37 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -29,6 +29,7 @@ #include #include "mandoc.h" +#include "regs.h" #include "libmdoc.h" #include "libmandoc.h" @@ -69,7 +70,7 @@ const char *const __mdoc_macronames[MDOC_MAX] = { /* LINTED */ "Dx", "%Q", "br", "sp", /* LINTED */ - "%U" + "%U", "Ta" }; const char *const __mdoc_argnames[MDOC_ARG_MAX] = { @@ -229,7 +230,8 @@ mdoc_endparse(struct mdoc *m) * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). */ int -mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) +mdoc_parseln(struct mdoc *m, const struct regset *regs, + int ln, char *buf, int offs) { if (MDOC_HALT & m->flags) @@ -276,11 +278,11 @@ mdoc_macro(struct mdoc *m, enum mdoct tok, if ( ! mdoc_pmsg(m, ln, pp, MANDOCERR_BADPROLOG)) return(0); if (NULL == m->meta.title) - m->meta.title = mandoc_strdup("unknown"); + m->meta.title = mandoc_strdup("UNKNOWN"); if (NULL == m->meta.vol) - m->meta.vol = mandoc_strdup("local"); + m->meta.vol = mandoc_strdup("LOCAL"); if (NULL == m->meta.os) - m->meta.os = mandoc_strdup("local"); + m->meta.os = mandoc_strdup("LOCAL"); if (0 == m->meta.date) m->meta.date = time(NULL); m->flags |= MDOC_PBODY; @@ -542,7 +544,8 @@ mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) static int mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) { - int i; + char *c, *ws, *end; + struct mdoc_node *n; /* Ignore bogus comments. */ @@ -556,20 +559,86 @@ mdoc_ptext(struct mdoc *m, int line, char *buf, int of if (SEC_NONE == m->lastnamed) return(mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT)); - /* Literal just gets pulled in as-is. */ - - if (MDOC_LITERAL & m->flags) - return(mdoc_word_alloc(m, line, offs, buf + offs)); + assert(m->last); + n = m->last; - /* Check for a blank line, which may also consist of spaces. */ + /* + * Divert directly to list processing if we're encountering a + * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry + * (a MDOC_BODY means it's already open, in which case we should + * process within its context in the normal way). + */ - for (i = offs; ' ' == buf[i]; i++) - /* Skip to first non-space. */ ; + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->data.Bl.type) { + /* `Bl' is open without any children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } - if ('\0' == buf[i]) { - if ( ! mdoc_pmsg(m, line, offs, MANDOCERR_NOBLANKLN)) + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->data.Bl.type) { + /* `Bl' has block-level `It' children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } + + /* + * Search for the beginning of unescaped trailing whitespace (ws) + * and for the first character not to be output (end). + */ + + /* FIXME: replace with strcspn(). */ + ws = NULL; + for (c = end = buf + offs; *c; c++) { + switch (*c) { + case '-': + if (mandoc_hyph(buf + offs, c)) + *c = ASCII_HYPH; + ws = NULL; + break; + case ' ': + if (NULL == ws) + ws = c; + continue; + case '\t': + /* + * Always warn about trailing tabs, + * even outside literal context, + * where they should be put on the next line. + */ + if (NULL == ws) + ws = c; + /* + * Strip trailing tabs in literal context only; + * outside, they affect the next line. + */ + if (MDOC_LITERAL & m->flags) + continue; + break; + case '\\': + /* Skip the escaped character, too, if any. */ + if (c[1]) + c++; + /* FALLTHROUGH */ + default: + ws = NULL; + break; + } + end = c + 1; + } + *end = '\0'; + + if (ws) + if ( ! mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE)) return(0); + if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { + if ( ! mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN)) + return(0); + /* * Insert a `Pp' in the case of a blank line. Technically, * blank lines aren't allowed, but enough manuals assume this @@ -582,41 +651,21 @@ mdoc_ptext(struct mdoc *m, int line, char *buf, int of return(1); } - /* - * Warn if the last un-escaped character is whitespace. Then - * strip away the remaining spaces (tabs stay!). - */ - - i = (int)strlen(buf); - assert(i); - - if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { - if (i > 1 && '\\' != buf[i - 2]) - if ( ! mdoc_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE)) - return(0); - - for (--i; i && ' ' == buf[i]; i--) - /* Spin back to non-space. */ ; - - /* Jump ahead of escaped whitespace. */ - i += '\\' == buf[i] ? 2 : 1; - - buf[i] = '\0'; - } - - /* Allocate the whole word. */ - - if ( ! mdoc_word_alloc(m, line, offs, buf + offs)) + if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) return(0); + if (MDOC_LITERAL & m->flags) + return(1); + /* * End-of-sentence check. If the last character is an unescaped * EOS character, then flag the node as being the end of a * sentence. The front-end will know how to interpret this. */ - assert(i); - if (mandoc_eos(buf, (size_t)i)) + assert(buf < end); + + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) m->last->flags |= MDOC_EOS; return(1); @@ -633,6 +682,7 @@ macrowarn(struct mdoc *m, int ln, const char *buf, int buf, strlen(buf) > 3 ? "..." : ""); /* FIXME: logic should be in driver. */ + /* FIXME: broken, will error out and not omit a message. */ return(MDOC_IGN_MACRO & m->pflags ? rc : 0); } @@ -644,9 +694,10 @@ macrowarn(struct mdoc *m, int ln, const char *buf, int int mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) { - enum mdoct tok; - int i, j, sv; - char mac[5]; + enum mdoct tok; + int i, j, sv; + char mac[5]; + struct mdoc_node *n; /* Empty lines are ignored. */ @@ -714,10 +765,51 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf, int off if ( ! mdoc_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE)) goto err; - /* - * Begin recursive parse sequence. Since we're at the start of - * the line, we don't need to do callable/parseable checks. + /* + * If an initial macro or a list invocation, divert directly + * into macro processing. */ + + if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { + if ( ! mdoc_macro(m, tok, ln, sv, &i, buf)) + goto err; + return(1); + } + + n = m->last; + assert(m->last); + + /* + * If the first macro of a `Bl -column', open an `It' block + * context around the parsed macro. + */ + + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->data.Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* + * If we're following a block-level `It' within a `Bl -column' + * context (perhaps opened in the above block or in ptext()), + * then open an `It' block context around the parsed macro. + */ + + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->data.Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* Normal processing of a macro. */ + if ( ! mdoc_macro(m, tok, ln, sv, &i, buf)) goto err;