mandoc/mdoc.c - diff

Return to mdoc.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/mdoc.c between version 1.50 and 1.253

-version 1.50, 2009/03/02 17:14:46
+version 1.253, 2015/10/06 18:32:19
 Line 1
 Line 1
 Line 1
- /* $Id$ */
+ /*      $Id$ */
  /*
-  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
+  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+  * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
-  * purpose with or without fee is hereby granted, provided that the
+  * purpose with or without fee is hereby granted, provided that the above
-  * above copyright notice and this permission notice appear in all
+  * copyright notice and this permission notice appear in all copies.
-  * copies.
   *
-  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
-  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
-  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-  * PERFORMANCE OF THIS SOFTWARE.
   */
+ #include "config.h"
+ #include <sys/types.h>
  #include <assert.h>
  #include <ctype.h>
- #include <err.h>
  #include <stdarg.h>
- #include <stdlib.h>
  #include <stdio.h>
+ #include <stdlib.h>
  #include <string.h>
+ #include <time.h>
- #include "private.h"
+ #include "mandoc_aux.h"
+ #include "mandoc.h"
+ #include "roff.h"
+ #include "mdoc.h"
+ #include "libmandoc.h"
+ #include "roff_int.h"
+ #include "libmdoc.h"
- /*
+ const   char *const __mdoc_macronames[MDOC_MAX + 1] = {
-  * Main caller in the libmdoc library.  This begins the parsing routine,
+         "text",
-  * handles allocation of data, and so forth.  Most of the "work" is done
+         "Ap",           "Dd",           "Dt",           "Os",
-  * in macro.c and validate.c.
-  */
- static  struct mdoc_arg  *argdup(size_t, const struct mdoc_arg *);
- static  void              argfree(size_t, struct mdoc_arg *);
- static  void              argcpy(struct mdoc_arg *,
-                                 const struct mdoc_arg *);
- static  struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
- static  int               mdoc_node_append(struct mdoc *,
-                                 struct mdoc_node *);
- static  void              mdoc_elem_free(struct mdoc_elem *);
- static  void              mdoc_text_free(struct mdoc_text *);
- const   char *const __mdoc_macronames[MDOC_MAX] = {
-         "\\\"",         "Dd",           "Dt",           "Os",
          "Sh",           "Ss",           "Pp",           "D1",
          "Dl",           "Bd",           "Ed",           "Bl",
          "El",           "It",           "Ad",           "An",
-Line 55  const char *const __mdoc_macronames[MDOC_MAX] = {
+Line 47  const char *const __mdoc_macronames[MDOC_MAX] = {
 Line 55  const char *const __mdoc_macronames[MDOC_MAX] = {
 Line 47  const char *const __mdoc_macronames[MDOC_MAX] = {
          "Ic",           "In",           "Li",           "Nd",
          "Nm",           "Op",           "Ot",           "Pa",
          "Rv",           "St",           "Va",           "Vt",
-         /* LINTED */
+         "Xr",           "%A",           "%B",           "%D",
-         "Xr",           "\%A",          "\%B",          "\%D",
+         "%I",           "%J",           "%N",           "%O",
-         /* LINTED */
+         "%P",           "%R",           "%T",           "%V",
-         "\%I",          "\%J",          "\%N",          "\%O",
-         /* LINTED */
-         "\%P",          "\%R",          "\%T",          "\%V",
          "Ac",           "Ao",           "Aq",           "At",
          "Bc",           "Bf",           "Bo",           "Bq",
          "Bsx",          "Bx",           "Db",           "Dc",
-Line 74  const char *const __mdoc_macronames[MDOC_MAX] = {
+Line 63  const char *const __mdoc_macronames[MDOC_MAX] = {
 Line 74  const char *const __mdoc_macronames[MDOC_MAX] = {
 Line 63  const char *const __mdoc_macronames[MDOC_MAX] = {
          "Tn",           "Ux",           "Xc",           "Xo",
          "Fo",           "Fc",           "Oo",           "Oc",
          "Bk",           "Ek",           "Bt",           "Hf",
-         "Fr",           "Ud",
+         "Fr",           "Ud",           "Lb",           "Lp",
-         };
+         "Lk",           "Mt",           "Brq",          "Bro",
+         "Brc",          "%C",           "Es",           "En",
+         "Dx",           "%Q",           "br",           "sp",
+         "%U",           "Ta",           "ll",
+ };
  const   char *const __mdoc_argnames[MDOC_ARG_MAX] = {
          "split",                "nosplit",              "ragged",
          "unfilled",             "literal",              "file",
          "offset",               "bullet",               "dash",
          "hyphen",               "item",                 "enum",
          "tag",                  "diag",                 "hang",
          "ohang",                "inset",                "column",
          "width",                "compact",              "std",
-         "p1003.1-88",           "p1003.1-90",           "p1003.1-96",
+         "filled",               "words",                "emphasis",
-         "p1003.1-2001",         "p1003.1-2004",         "p1003.1",
+         "symbolic",             "nested",               "centered"
-         "p1003.1b",             "p1003.1b-93",          "p1003.1c-95",
-         "p1003.1g-2000",        "p1003.2-92",           "p1387.2-95",
-         "p1003.2",              "p1387.2",              "isoC-90",
-         "isoC-amd1",            "isoC-tcor1",           "isoC-tcor2",
-         "isoC-99",              "ansiC",                "ansiC-89",
-         "ansiC-99",             "ieee754",              "iso8802-3",
-         "xpg3",                 "xpg4",                 "xpg4.2",
-         "xpg4.3",               "xbd5",                 "xcu5",
-         "xsh5",                 "xns5",                 "xns5.2d2.0",
-         "xcurses4.2",           "susv2",                "susv3",
-         "svid4",                "filled",               "words",
-         "emphasis",             "symbolic",
          };
- const   char * const *mdoc_macronames = __mdoc_macronames;
+ const   char * const *mdoc_macronames = __mdoc_macronames + 1;
  const   char * const *mdoc_argnames = __mdoc_argnames;
+ static  int               mdoc_ptext(struct roff_man *, int, char *, int);
+ static  int               mdoc_pmacro(struct roff_man *, int, char *, int);
- const struct mdoc_node *
- mdoc_node(const struct mdoc *mdoc)
- {
-         return(mdoc->first);
- }
- const struct mdoc_meta *
- mdoc_meta(const struct mdoc *mdoc)
- {
-         return(&mdoc->meta);
- }
- void
- mdoc_free(struct mdoc *mdoc)
- {
-         if (mdoc->first)
-                 mdoc_node_freelist(mdoc->first);
-         if (mdoc->htab)
-                 mdoc_tokhash_free(mdoc->htab);
-         if (mdoc->meta.title)
-                 free(mdoc->meta.title);
-         if (mdoc->meta.os)
-                 free(mdoc->meta.os);
-         if (mdoc->meta.name)
-                 free(mdoc->meta.name);
-         free(mdoc);
- }
- struct mdoc *
- mdoc_alloc(void *data, const struct mdoc_cb *cb)
- {
-         struct mdoc     *p;
-         p = xcalloc(1, sizeof(struct mdoc));
-         p->data = data;
-         if (cb)
-                 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
-         p->last = xcalloc(1, sizeof(struct mdoc_node));
-         p->last->type = MDOC_ROOT;
-         p->first = p->last;
-         p->next = MDOC_NEXT_CHILD;
-         p->htab = mdoc_tokhash_alloc();
-         return(p);
- }
- int
- mdoc_endparse(struct mdoc *mdoc)
- {
-         if (MDOC_HALT & mdoc->flags)
-                 return(0);
-         if (NULL == mdoc->first)
-                 return(1);
-         assert(mdoc->last);
-         if ( ! macro_end(mdoc)) {
-                 mdoc->flags |= MDOC_HALT;
-                 return(0);
-         }
-         return(1);
- }
  /*
-  * Main line-parsing routine.  If the line is a macro-line (started with
+  * Main parse routine.  Parses a single line -- really just hands off to
-  * a '.' control character), then pass along to the parser, which parses
+  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
-  * subsequent macros until the end of line.  If normal text, simply
-  * append the entire line to the chain.
   */
  int
- mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
+ mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
  {
-         int               c, i;
-         char              tmp[5];
-         if (MDOC_HALT & mdoc->flags)
+         if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
-                 return(0);
+                 mdoc->flags |= MDOC_NEWLINE;
-         mdoc->linetok = 0;
-         if ('.' != *buf) {
-                 /*
-                  * Free-form text.  Not allowed in the prologue.
-                  */
-                 if (SEC_PROLOGUE == mdoc->lastnamed)
-                         return(mdoc_perr(mdoc, line, 0,
-                                         "no text in prologue"));
-                 if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
-                         return(0);
-                 mdoc->next = MDOC_NEXT_SIBLING;
-                 return(1);
-         }
          /*
-          * Control-character detected.  Begin the parsing sequence.
+          * Let the roff nS register switch SYNOPSIS mode early,
+          * such that the parser knows at all times
+          * whether this mode is on or off.
+          * Note that this mode is also switched by the Sh macro.
           */
+         if (roff_getreg(mdoc->roff, "nS"))
+                 mdoc->flags |= MDOC_SYNOPSIS;
+         else
+                 mdoc->flags &= ~MDOC_SYNOPSIS;
-         if (buf[1] && '\\' == buf[1])
+         return roff_getcontrol(mdoc->roff, buf, &offs) ?
-                 if (buf[2] && '\"' == buf[2])
+             mdoc_pmacro(mdoc, ln, buf, offs) :
-                         return(1);
+             mdoc_ptext(mdoc, ln, buf, offs);
-         i = 1;
-         while (buf[i] && ! isspace((int)buf[i]) && i < (int)sizeof(tmp))
-                 i++;
-         if (i == (int)sizeof(tmp)) {
-                 mdoc->flags |= MDOC_HALT;
-                 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
-         } else if (i <= 2) {
-                 mdoc->flags |= MDOC_HALT;
-                 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
-         }
-         i--;
-         (void)memcpy(tmp, buf + 1, (size_t)i);
-         tmp[i++] = 0;
-         if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) {
-                 mdoc->flags |= MDOC_HALT;
-                 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
-         }
-         while (buf[i] && isspace((int)buf[i]))
-                 i++;
-         if ( ! mdoc_macro(mdoc, c, line, 1, &i, buf)) {
-                 mdoc->flags |= MDOC_HALT;
-                 return(0);
-         }
-         return(1);
  }
  void
- mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
+ mdoc_macro(MACRO_PROT_ARGS)
  {
-         char              buf[256];
+         assert(tok > TOKEN_NONE && tok < MDOC_MAX);
-         va_list           ap;
-         if (NULL == mdoc->cb.mdoc_msg)
+         if (mdoc->flags & MDOC_PBODY) {
-                 return;
+                 if (tok == MDOC_Dt) {
+                         mandoc_vmsg(MANDOCERR_DT_LATE,
-         va_start(ap, fmt);
+                             mdoc->parse, line, ppos,
-         (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
+                             "Dt %s", buf + *pos);
-         va_end(ap);
+                         return;
-         (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
+                 }
+         } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) {
+                 if (mdoc->meta.title == NULL) {
+                         mandoc_vmsg(MANDOCERR_DT_NOTITLE,
+                             mdoc->parse, line, ppos, "%s %s",
+                             mdoc_macronames[tok], buf + *pos);
+                         mdoc->meta.title = mandoc_strdup("UNTITLED");
+                 }
+                 if (NULL == mdoc->meta.vol)
+                         mdoc->meta.vol = mandoc_strdup("LOCAL");
+                 mdoc->flags |= MDOC_PBODY;
+         }
+         (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
  }
+ void
- int
+ mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok)
- mdoc_verr(struct mdoc *mdoc, int ln, int pos,
-                 const char *fmt, ...)
  {
-         char             buf[256];
+         struct roff_node *p;
-         va_list          ap;
-         if (NULL == mdoc->cb.mdoc_err)
+         p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
-                 return(0);
+         roff_node_append(mdoc, p);
+         mdoc->next = ROFF_NEXT_CHILD;
-         va_start(ap, fmt);
-         (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
-         va_end(ap);
-         return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
  }
+ struct roff_node *
- int
+ mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok,
- mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
+                 struct roff_node *body, enum mdoc_endbody end)
-                 enum mdoc_warn type, const char *fmt, ...)
  {
-         char             buf[256];
+         struct roff_node *p;
-         va_list          ap;
-         if (NULL == mdoc->cb.mdoc_warn)
+         body->flags |= MDOC_ENDED;
-                 return(0);
+         body->parent->flags |= MDOC_ENDED;
+         p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
-         va_start(ap, fmt);
+         p->body = body;
-         (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
+         p->norm = body->norm;
-         va_end(ap);
+         p->end = end;
-         return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
+         roff_node_append(mdoc, p);
+         mdoc->next = ROFF_NEXT_SIBLING;
+         return p;
  }
+ struct roff_node *
- int
+ mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
- mdoc_macro(struct mdoc *mdoc, int tok,
+         int tok, struct mdoc_arg *args)
-                 int ln, int ppos, int *pos, char *buf)
  {
+         struct roff_node *p;
-         assert(mdoc_macros[tok].fp);
+         p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
+         p->args = args;
+         if (p->args)
+                 (args->refcnt)++;
-         if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
+         switch (tok) {
-                         SEC_PROLOGUE != mdoc->lastnamed)
+         case MDOC_Bd:
-                 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document body"));
-         if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
-                         SEC_PROLOGUE == mdoc->lastnamed)
-                 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
-         if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
-                 return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
-         return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
- }
- static int
- mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
- {
-         const char       *nn, *nt, *on, *ot, *act;
-         assert(mdoc->last);
-         assert(mdoc->first);
-         assert(MDOC_ROOT != p->type);
-         /* See if we exceed the suggest line-max. */
-         switch (p->type) {
-         case (MDOC_TEXT):
                  /* FALLTHROUGH */
-         case (MDOC_ELEM):
+         case MDOC_Bf:
                  /* FALLTHROUGH */
-         case (MDOC_BLOCK):
+         case MDOC_Bl:
-                 mdoc->linetok++;
+                 /* FALLTHROUGH */
+         case MDOC_En:
+                 /* FALLTHROUGH */
+         case MDOC_Rs:
+                 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                  break;
          default:
                  break;
          }
+         roff_node_append(mdoc, p);
+         mdoc->next = ROFF_NEXT_CHILD;
+         return p;
+ }
-         /* This sort-of works (re-opening of text macros...). */
+ void
-         if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
+ mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
-                 if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
+         int tok, struct mdoc_arg *args)
-                                         "suggested %d tokens per line exceeded (has %d)",
+ {
-                                         MDOC_LINEARG_SOFTMAX, mdoc->linetok))
+         struct roff_node *p;
-                         return(0);
-         if (MDOC_TEXT == mdoc->last->type)
+         p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
-                 on = "<text>";
+         p->args = args;
-         else if (MDOC_ROOT == mdoc->last->type)
+         if (p->args)
-                 on = "<root>";
+                 (args->refcnt)++;
-         else
-                 on = mdoc_macronames[mdoc->last->tok];
-         if (MDOC_TEXT == p->type)
+         switch (tok) {
-                 nn = "<text>";
+         case MDOC_An:
-         else if (MDOC_ROOT == p->type)
+                 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
-                 nn = "<root>";
-         else
-                 nn = mdoc_macronames[p->tok];
-         ot = mdoc_type2a(mdoc->last->type);
-         nt = mdoc_type2a(p->type);
-         switch (mdoc->next) {
-         case (MDOC_NEXT_SIBLING):
-                 mdoc->last->next = p;
-                 p->prev = mdoc->last;
-                 p->parent = mdoc->last->parent;
-                 act = "sibling";
                  break;
-         case (MDOC_NEXT_CHILD):
-                 mdoc->last->child = p;
-                 p->parent = mdoc->last;
-                 act = "child";
-                 break;
          default:
-                 abort();
-                 /* NOTREACHED */
-         }
-         if ( ! mdoc_valid_pre(mdoc, p))
-                 return(0);
-         switch (p->type) {
-         case (MDOC_HEAD):
-                 assert(MDOC_BLOCK == p->parent->type);
-                 p->parent->data.block.head = p;
                  break;
-         case (MDOC_TAIL):
-                 assert(MDOC_BLOCK == p->parent->type);
-                 p->parent->data.block.tail = p;
-                 break;
-         case (MDOC_BODY):
-                 assert(MDOC_BLOCK == p->parent->type);
-                 p->parent->data.block.body = p;
-                 break;
-         default:
-                 break;
          }
+         roff_node_append(mdoc, p);
-         mdoc->last = p;
+         mdoc->next = ROFF_NEXT_CHILD;
-         mdoc_msg(mdoc, "parse: %s `%s' %s of %s `%s'",
-                         nt, nn, act, ot, on);
-         return(1);
  }
+ void
- static struct mdoc_node *
+ mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
- mdoc_node_alloc(const struct mdoc *mdoc)
  {
-         struct mdoc_node *p;
-         p = xcalloc(1, sizeof(struct mdoc_node));
+         roff_node_unlink(mdoc, p);
-         p->sec = mdoc->lastsec;
+         roff_node_append(mdoc, p);
-         return(p);
  }
+ /*
- int
+  * Parse free-form text, that is, a line that does not begin with the
- mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
+  * control character.
+  */
+ static int
+ mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
  {
-         struct mdoc_node *p;
+         struct roff_node *n;
+         char             *c, *ws, *end;
-         assert(mdoc->first);
          assert(mdoc->last);
+         n = mdoc->last;
-         p = mdoc_node_alloc(mdoc);
+         /*
+          * Divert directly to list processing if we're encountering a
+          * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry
+          * (a ROFFT_BODY means it's already open, in which case we should
+          * process within its context in the normal way).
+          */
-         p->line = line;
+         if (n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
-         p->pos = pos;
+             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) {
-         p->type = MDOC_TAIL;
+                 /* `Bl' is open without any children. */
-         p->tok = tok;
+                 mdoc->flags |= MDOC_FREECOL;
+                 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
+                 return 1;
+         }
-         return(mdoc_node_append(mdoc, p));
+         if (n->tok == MDOC_It && n->type == ROFFT_BLOCK &&
- }
+             NULL != n->parent &&
+             MDOC_Bl == n->parent->tok &&
+             LIST_column == n->parent->norm->Bl.type) {
+                 /* `Bl' has block-level `It' children. */
+                 mdoc->flags |= MDOC_FREECOL;
+                 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
+                 return 1;
+         }
+         /*
+          * Search for the beginning of unescaped trailing whitespace (ws)
+          * and for the first character not to be output (end).
+          */
- int
+         /* FIXME: replace with strcspn(). */
- mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
+         ws = NULL;
- {
+         for (c = end = buf + offs; *c; c++) {
-         struct mdoc_node *p;
+                 switch (*c) {
+                 case ' ':
+                         if (NULL == ws)
+                                 ws = c;
+                         continue;
+                 case '\t':
+                         /*
+                          * Always warn about trailing tabs,
+                          * even outside literal context,
+                          * where they should be put on the next line.
+                          */
+                         if (NULL == ws)
+                                 ws = c;
+                         /*
+                          * Strip trailing tabs in literal context only;
+                          * outside, they affect the next line.
+                          */
+                         if (MDOC_LITERAL & mdoc->flags)
+                                 continue;
+                         break;
+                 case '\\':
+                         /* Skip the escaped character, too, if any. */
+                         if (c[1])
+                                 c++;
+                         /* FALLTHROUGH */
+                 default:
+                         ws = NULL;
+                         break;
+                 }
+                 end = c + 1;
+         }
+         *end = '\0';
-         assert(mdoc->first);
+         if (ws)
-         assert(mdoc->last);
+                 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
+                     line, (int)(ws-buf), NULL);
-         p = mdoc_node_alloc(mdoc);
+         if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
+                 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
+                     line, (int)(c - buf), NULL);
-         p->line = line;
+                 /*
-         p->pos = pos;
+                  * Insert a `sp' in the case of a blank line.  Technically,
-         p->type = MDOC_HEAD;
+                  * blank lines aren't allowed, but enough manuals assume this
-         p->tok = tok;
+                  * behaviour that we want to work around it.
+                  */
+                 roff_elem_alloc(mdoc, line, offs, MDOC_sp);
+                 mdoc->next = ROFF_NEXT_SIBLING;
+                 mdoc_valid_post(mdoc);
+                 return 1;
+         }
-         return(mdoc_node_append(mdoc, p));
+         roff_word_alloc(mdoc, line, offs, buf+offs);
- }
+         if (mdoc->flags & MDOC_LITERAL)
+                 return 1;
- int
+         /*
- mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
+          * End-of-sentence check.  If the last character is an unescaped
- {
+          * EOS character, then flag the node as being the end of a
-         struct mdoc_node *p;
+          * sentence.  The front-end will know how to interpret this.
+          */
-         assert(mdoc->first);
+         assert(buf < end);
-         assert(mdoc->last);
-         p = mdoc_node_alloc(mdoc);
+         if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
+                 mdoc->last->flags |= MDOC_EOS;
-         p->line = line;
+         return 1;
-         p->pos = pos;
-         p->type = MDOC_BODY;
-         p->tok = tok;
-         return(mdoc_node_append(mdoc, p));
  }
+ /*
- int
+  * Parse a macro line, that is, a line beginning with the control
- mdoc_root_alloc(struct mdoc *mdoc)
+  * character.
+  */
+ static int
+ mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
  {
-         struct mdoc_node *p;
+         struct roff_node *n;
+         const char       *cp;
+         int               tok;
+         int               i, sv;
+         char              mac[5];
-         p = mdoc_node_alloc(mdoc);
+         sv = offs;
-         p->type = MDOC_ROOT;
+         /*
+          * Copy the first word into a nil-terminated buffer.
+          * Stop when a space, tab, escape, or eoln is encountered.
+          */
-         return(mdoc_node_append(mdoc, p));
+         i = 0;
- }
+         while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
+                 mac[i++] = buf[offs++];
+         mac[i] = '\0';
- int
+         tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE;
- mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
-                 int tok, size_t argsz, const struct mdoc_arg *args)
- {
-         struct mdoc_node *p;
-         p = mdoc_node_alloc(mdoc);
+         if (tok == TOKEN_NONE) {
+                 mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
+                     ln, sv, buf + sv - 1);
+                 return 1;
+         }
-         p->pos = pos;
+         /* Skip a leading escape sequence or tab. */
-         p->line = line;
-         p->type = MDOC_BLOCK;
-         p->tok = tok;
-         p->data.block.argc = argsz;
-         p->data.block.argv = argdup(argsz, args);
-         return(mdoc_node_append(mdoc, p));
+         switch (buf[offs]) {
- }
+         case '\\':
+                 cp = buf + offs + 1;
+                 mandoc_escape(&cp, NULL, NULL);
- int
+                 offs = cp - buf;
- mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
-                 int tok, size_t argsz, const struct mdoc_arg *args)
- {
-         struct mdoc_node *p;
-         p = mdoc_node_alloc(mdoc);
-         p->line = line;
-         p->pos = pos;
-         p->type = MDOC_ELEM;
-         p->tok = tok;
-         p->data.elem.argc = argsz;
-         p->data.elem.argv = argdup(argsz, args);
-         return(mdoc_node_append(mdoc, p));
- }
- int
- mdoc_word_alloc(struct mdoc *mdoc,
-                 int line, int pos, const char *word)
- {
-         struct mdoc_node *p;
-         p = mdoc_node_alloc(mdoc);
-         p->line = line;
-         p->pos = pos;
-         p->type = MDOC_TEXT;
-         p->data.text.string = xstrdup(word);
-         return(mdoc_node_append(mdoc, p));
- }
- static void
- argfree(size_t sz, struct mdoc_arg *p)
- {
-         int              i, j;
-         if (0 == sz)
-                 return;
-         assert(p);
-         /* LINTED */
-         for (i = 0; i < (int)sz; i++)
-                 if (p[i].sz > 0) {
-                         assert(p[i].value);
-                         /* LINTED */
-                         for (j = 0; j < (int)p[i].sz; j++)
-                                 free(p[i].value[j]);
-                         free(p[i].value);
-                 }
-         free(p);
- }
- static void
- mdoc_elem_free(struct mdoc_elem *p)
- {
-         argfree(p->argc, p->argv);
- }
- static void
- mdoc_block_free(struct mdoc_block *p)
- {
-         argfree(p->argc, p->argv);
- }
- static void
- mdoc_text_free(struct mdoc_text *p)
- {
-         if (p->string)
-                 free(p->string);
- }
- void
- mdoc_node_free(struct mdoc_node *p)
- {
-         switch (p->type) {
-         case (MDOC_TEXT):
-                 mdoc_text_free(&p->data.text);
                  break;
-         case (MDOC_ELEM):
+         case '\t':
-                 mdoc_elem_free(&p->data.elem);
+                 offs++;
                  break;
-         case (MDOC_BLOCK):
-                 mdoc_block_free(&p->data.block);
-                 break;
          default:
                  break;
          }
-         free(p);
+         /* Jump to the next non-whitespace word. */
- }
+         while (buf[offs] && ' ' == buf[offs])
+                 offs++;
- void
+         /*
- mdoc_node_freelist(struct mdoc_node *p)
+          * Trailing whitespace.  Note that tabs are allowed to be passed
- {
+          * into the parser as "text", so we only warn about spaces here.
+          */
-         if (p->child)
+         if ('\0' == buf[offs] && ' ' == buf[offs - 1])
-                 mdoc_node_freelist(p->child);
+                 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
-         if (p->next)
+                     ln, offs - 1, NULL);
-                 mdoc_node_freelist(p->next);
-         mdoc_node_free(p);
+         /*
- }
+          * If an initial macro or a list invocation, divert directly
+          * into macro processing.
+          */
+         if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
+                 mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
+                 return 1;
+         }
- int
+         n = mdoc->last;
- mdoc_find(const struct mdoc *mdoc, const char *key)
+         assert(mdoc->last);
- {
-         return(mdoc_tokhash_find(mdoc->htab, key));
+         /*
- }
+          * If the first macro of a `Bl -column', open an `It' block
+          * context around the parsed macro.
+          */
+         if (n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
+             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) {
+                 mdoc->flags |= MDOC_FREECOL;
+                 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
+                 return 1;
+         }
- static void
+         /*
- argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
+          * If we're following a block-level `It' within a `Bl -column'
- {
+          * context (perhaps opened in the above block or in ptext()),
-         int              i;
+          * then open an `It' block context around the parsed macro.
+          */
-         dst->line = src->line;
+         if (n->tok == MDOC_It && n->type == ROFFT_BLOCK &&
-         dst->pos = src->pos;
+             NULL != n->parent &&
-         dst->arg = src->arg;
+             MDOC_Bl == n->parent->tok &&
-         if (0 == (dst->sz = src->sz))
+             LIST_column == n->parent->norm->Bl.type) {
-                 return;
+                 mdoc->flags |= MDOC_FREECOL;
-         dst->value = xcalloc(dst->sz, sizeof(char *));
+                 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
-         for (i = 0; i < (int)dst->sz; i++)
+                 return 1;
-                 dst->value[i] = xstrdup(src->value[i]);
+         }
- }
+         /* Normal processing of a macro. */
- static struct mdoc_arg *
+         mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
- argdup(size_t argsz, const struct mdoc_arg *args)
- {
-         struct mdoc_arg *pp;
-         int              i;
-         if (0 == argsz)
+         /* In quick mode (for mandocdb), abort after the NAME section. */
-                 return(NULL);
-         pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
+         if (mdoc->quick && MDOC_Sh == tok &&
-         for (i = 0; i < (int)argsz; i++)
+             SEC_NAME != mdoc->last->sec)
-                 argcpy(&pp[i], &args[i]);
+                 return 2;
-         return(pp);
+         return 1;
  }
+ enum mdelim
- /* FIXME: deprecate. */
+ mdoc_isdelim(const char *p)
- char *
- mdoc_node2a(struct mdoc_node *node)
  {
-         static char      buf[64];
-         assert(node);
+         if ('\0' == p[0])
+                 return DELIM_NONE;
-         buf[0] = 0;
+         if ('\0' == p[1])
-         (void)xstrlcat(buf, mdoc_type2a(node->type), 64);
+                 switch (p[0]) {
-         if (MDOC_ROOT == node->type)
+                 case '(':
-                 return(buf);
+                         /* FALLTHROUGH */
-         (void)xstrlcat(buf, " `", 64);
+                 case '[':
-         if (MDOC_TEXT == node->type)
+                         return DELIM_OPEN;
-                 (void)xstrlcat(buf, node->data.text.string, 64);
+                 case '|':
-         else
+                         return DELIM_MIDDLE;
-                 (void)xstrlcat(buf, mdoc_macronames[node->tok], 64);
+                 case '.':
-         (void)xstrlcat(buf, "'", 64);
+                         /* FALLTHROUGH */
+                 case ',':
+                         /* FALLTHROUGH */
+                 case ';':
+                         /* FALLTHROUGH */
+                 case ':':
+                         /* FALLTHROUGH */
+                 case '?':
+                         /* FALLTHROUGH */
+                 case '!':
+                         /* FALLTHROUGH */
+                 case ')':
+                         /* FALLTHROUGH */
+                 case ']':
+                         return DELIM_CLOSE;
+                 default:
+                         return DELIM_NONE;
+                 }
-         return(buf);
+         if ('\\' != p[0])
- }
+                 return DELIM_NONE;
+         if (0 == strcmp(p + 1, "."))
+                 return DELIM_CLOSE;
+         if (0 == strcmp(p + 1, "fR|\\fP"))
+                 return DELIM_MIDDLE;
+         return DELIM_NONE;
+ }

CVSweb