pod2mdoc/pod2mdoc.c - diff

Return to pod2mdoc.c CVS log

Up to [cvsweb.bsd.lv] / pod2mdoc

Diff for /pod2mdoc/pod2mdoc.c between version 1.3 and 1.57

-version 1.3, 2014/03/20 15:18:56
+version 1.57, 2015/02/21 21:15:41
 Line 1
 Line 1
 Line 1
- /*      $Id$ */
+ /*      $Id$    */
  /*
   * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
-Line 26
+Line 27
 Line 26
 Line 27
  #include <string.h>
  #include <unistd.h>
+ #include "dict.h"
+ /*
+  * In what section can we find Perl module manuals?
+  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
+  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
+  */
+ #define PERL_SECTION    "3p"
  struct  args {
          const char      *title; /* override "Dt" title */
          const char      *date; /* override "Dd" date */
          const char      *section; /* override "Dt" section */
  };
+ enum    list {
+         LIST_BULLET = 0,
+         LIST_ENUM,
+         LIST_TAG,
+         LIST__MAX
+ };
+ enum    sect {
+         SECT_NONE = 0,
+         SECT_NAME, /* NAME section */
+         SECT_SYNOPSIS, /* SYNOPSIS section */
+ };
+ enum    outstate {
+         OUST_NL = 0,    /* just started a new output line */
+         OUST_TXT,       /* text line output in progress */
+         OUST_MAC        /* macro line output in progress */
+ };
  struct  state {
+         const char      *fname; /* file being parsed */
          int              parsing; /* after =cut of before command */
          int              paused; /* in =begin and before =end */
+         enum sect        sect; /* which section are we in? */
+ #define LIST_STACKSZ     128
+         enum list        lstack[LIST_STACKSZ]; /* open lists */
+         size_t           lpos; /* where in list stack */
          int              haspar; /* in paragraph: do we need Pp? */
-         int              isname; /* are we the NAME section? */
+         enum outstate    oust; /* state of the mdoc output stream */
-         const char      *fname; /* file being parsed */
+         int              wantws; /* let mdoc(7) output whitespace here */
+         char            *outbuf; /* text buffered for output */
+         size_t           outbufsz; /* allocated size of outbuf */
+         size_t           outbuflen; /* current length of outbuf */
  };
  enum    fmt {
-Line 70  enum cmd {
+Line 107  enum cmd {
 Line 70  enum cmd {
 Line 107  enum cmd {
          CMD__MAX
  };
+ static void      command(struct state *, const char *, size_t, size_t);
+ static void      dofile(const struct args *, const char *,
+                         const struct tm *, char *, size_t);
+ static void      donamenm(struct state *, const char *, size_t *, size_t);
+ static void      dopar(struct state *, char *, size_t, size_t);
+ static void      dosynopsisfl(const char *, size_t *, size_t);
+ static int       dosynopsisop(struct state *, const char *, size_t *,
+                         size_t, size_t *);
+ static int       formatcode(struct state *, const char *, size_t *,
+                         size_t, int, int);
+ static void      formatcodeln(struct state *, const char *, const char *,
+                         size_t *, size_t, int);
+ static void      formatescape(struct state *, const char *, size_t *, size_t);
+ static int       hasmatch(const char *, size_t, size_t);
+ static void      ordinary(struct state *, const char *, size_t, size_t);
+ static void      outbuf_addchar(struct state *);
+ static void      outbuf_addstr(struct state *, const char *);
+ static void      outbuf_flush(struct state *);
+ static void      outbuf_grow(struct state *, size_t);
+ static enum list listguess(const char *, size_t, size_t);
+ static void      mdoc_newln(struct state *);
+ static int       readfile(const struct args *, const char *);
+ static void      register_type(const char *);
+ static int       trylink(const char *, size_t *, size_t, size_t);
+ static void      verbatim(struct state *, char *, size_t, size_t);
  static  const char *const cmds[CMD__MAX] = {
          "pod",          /* CMD_POD */
          "head1",        /* CMD_HEAD1 */
-Line 98  static const char fmts[FMT__MAX] = {
+Line 161  static const char fmts[FMT__MAX] = {
 Line 98  static const char fmts[FMT__MAX] = {
 Line 161  static const char fmts[FMT__MAX] = {
          'Z'             /* FMT_NULL */
  };
+ static  unsigned char   last;
+ static void
+ outbuf_grow(struct state *st, size_t by)
+ {
+         st->outbufsz += (by / 128 + 1) * 128;
+         st->outbuf = realloc(st->outbuf, st->outbufsz);
+         if (NULL == st->outbuf) {
+                 perror(NULL);
+                 exit(EXIT_FAILURE);
+         }
+ }
+ static void
+ outbuf_addchar(struct state *st)
+ {
+         if (st->outbuflen + 2 >= st->outbufsz)
+                 outbuf_grow(st, 1);
+         st->outbuf[st->outbuflen++] = last;
+         if ('\\' == last)
+                 st->outbuf[st->outbuflen++] = 'e';
+         st->outbuf[st->outbuflen] = '\0';
+ }
+ static void
+ outbuf_addstr(struct state *st, const char *str)
+ {
+         size_t   slen;
+         slen = strlen(str);
+         if (st->outbuflen + slen >= st->outbufsz)
+                 outbuf_grow(st, slen);
+         memcpy(st->outbuf + st->outbuflen, str, slen+1);
+         st->outbuflen += slen;
+         last = str[slen - 1];
+ }
+ static void
+ outbuf_flush(struct state *st)
+ {
+         if (0 == st->outbuflen)
+                 return;
+         if (OUST_NL != st->oust && st->wantws)
+                 putchar(' ');
+         if (OUST_MAC == st->oust && '"' == *st->outbuf)
+                 printf("\\(dq%s", st->outbuf + 1);
+         else
+                 fputs(st->outbuf, stdout);
+         *st->outbuf = '\0';
+         st->outbuflen = 0;
+         if (OUST_NL == st->oust)
+                 st->oust = OUST_TXT;
+ }
+ static void
+ mdoc_newln(struct state *st)
+ {
+         if (OUST_NL == st->oust)
+                 return;
+         putchar('\n');
+         last = '\n';
+         st->oust = OUST_NL;
+         st->wantws = 1;
+ }
  /*
   * Given buf[*start] is at the start of an escape name, read til the end
   * of the escape ('>') then try to do something with it.
   * Sets start to be one after the '>'.
+  *
+  * This function does not care about output modes,
+  * it merely appends text to the output buffer,
+  * which can then be used in any mode.
   */
  static void
- formatescape(const char *buf, size_t *start, size_t end)
+ formatescape(struct state *st, const char *buf, size_t *start, size_t end)
  {
          char             esc[16]; /* no more needed */
          size_t           i, max;
-Line 131  formatescape(const char *buf, size_t *start, size_t en
+Line 273  formatescape(const char *buf, size_t *start, size_t en
 Line 131  formatescape(const char *buf, size_t *start, size_t en
 Line 273  formatescape(const char *buf, size_t *start, size_t en
           * TODO: right now, we only recognise the named escapes.
           * Just let the rest of them go.
           */
          if (0 == strcmp(esc, "lt"))
-                 printf("\\(la");
+                 outbuf_addstr(st, "\\(la");
          else if (0 == strcmp(esc, "gt"))
-                 printf("\\(ra");
+                 outbuf_addstr(st, "\\(ra");
-         else if (0 == strcmp(esc, "vb"))
+         else if (0 == strcmp(esc, "verbar"))
-                 printf("\\(ba");
+                 outbuf_addstr(st, "\\(ba");
          else if (0 == strcmp(esc, "sol"))
-                 printf("\\(sl");
+                 outbuf_addstr(st, "\\(sl");
  }
  /*
-  * Skip space characters.
+  * Run some heuristics to intuit a link format.
+  * I set "start" to be the end of the sequence (last right-carrot) so
+  * that the caller can safely just continue processing.
+  * If this is just an empty tag, I'll return 0.
+  *
+  * Always operates in OUST_MAC mode.
+  * Mode handling is done by the caller.
   */
+ static int
+ trylink(const char *buf, size_t *start, size_t end, size_t dsz)
+ {
+         size_t           linkstart, realend, linkend,
+                          i, j, textsz, stack;
+         /*
+          * Scan to the start of the terminus.
+          * This function is more or less replicated in the formatcode()
+          * for null or index formatting codes.
+          * However, we're slightly different because we might have
+          * nested escapes we need to ignore.
+          */
+         stack = 0;
+         for (linkstart = realend = *start; realend < end; realend++) {
+                 if ('<' == buf[realend])
+                         stack++;
+                 if ('>' != buf[realend])
+                         continue;
+                 else if (stack-- > 0)
+                         continue;
+                 if (dsz == 1)
+                         break;
+                 assert(realend > 0);
+                 if (' ' != buf[realend - 1])
+                         continue;
+                 for (i = realend, j = 0; i < end && j < dsz; j++)
+                         if ('>' != buf[i++])
+                                 break;
+                 if (dsz == j)
+                         break;
+         }
+         /* Ignore stubs. */
+         if (realend == end || realend == *start)
+                 return(0);
+         /* Set linkend to the end of content. */
+         linkend = dsz > 1 ? realend - 1 : realend;
+         /* Re-scan to see if we have a title or section. */
+         for (textsz = *start; textsz < linkend; textsz++)
+                 if ('|' == buf[textsz] || '/' == buf[textsz])
+                         break;
+         if (textsz < linkend && '|' == buf[textsz]) {
+                 /* With title: set start, then end at section. */
+                 linkstart = textsz + 1;
+                 textsz = textsz - *start;
+                 for (i = linkstart; i < linkend; i++)
+                         if ('/' == buf[i])
+                                 break;
+                 if (i < linkend)
+                         linkend = i;
+         } else if (textsz < linkend && '/' == buf[textsz]) {
+                 /* With section: set end at section. */
+                 linkend = textsz;
+                 textsz = 0;
+         } else
+                 /* No title, no section. */
+                 textsz = 0;
+         *start = realend;
+         j = linkend - linkstart;
+         /* Do we have only subsection material? */
+         if (0 == j && '/' == buf[linkend]) {
+                 linkstart = linkend + 1;
+                 linkend = dsz > 1 ? realend - 1 : realend;
+                 if (0 == (j = linkend - linkstart))
+                         return(0);
+                 printf("Sx %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         } else if (0 == j)
+                 return(0);
+         /* See if we qualify as being a link or not. */
+         if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
+                 (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
+                 /* Gross. */
+                 printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
+                         realend) - linkstart), &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we qualify as a mailto. */
+         if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
+                 printf("Mt %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
+         if ((j > 3 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 3])) {
+                 printf("Xr %.*s %c", (int)(j - 3),
+                         &buf[linkstart], buf[linkend - 2]);
+                 return(1);
+         } else if ((j > 4 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 4])) {
+                 printf("Xr %.*s %.*s", (int)(j - 4),
+                         &buf[linkstart], 2, &buf[linkend - 3]);
+                 return(1);
+         } else if ((j > 5 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 5])) {
+                 printf("Xr %.*s %.*s", (int)(j - 5),
+                         &buf[linkstart], 3, &buf[linkend - 4]);
+                 return(1);
+         }
+         /* Last try: do we have a double-colon? */
+         for (i = linkstart + 1; i < linkend; i++)
+                 if (':' == buf[i] && ':' == buf[i - 1])
+                         break;
+         if (i < linkend)
+                 printf("Xr %.*s " PERL_SECTION,
+                         (int)j, &buf[linkstart]);
+         else
+                 printf("Xr %.*s 1", (int)j, &buf[linkstart]);
+         return(1);
+ }
+ /*
+  * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
+  * then it's likely that we're a flag.
+  * Our flag might be followed by an argument, so make sure that we're
+  * accounting for that, too.
+  * If we don't have a flag at all, however, then assume we're an "Ar".
+  *
+  * Always operates in OUST_MAC mode.
+  * Mode handlinf is done by the caller.
+  */
  static void
- skipspace(const char *buf, size_t *start, size_t end)
+ dosynopsisfl(const char *buf, size_t *start, size_t end)
  {
+         size_t   i;
+ again:
+         assert(*start + 1 < end);
+         assert('-' == buf[*start]);
-         while (*start < end && ' ' == buf[*start])
+         if ( ! isalnum((int)buf[*start + 1]) &&
-                 (*start)++;
+                 '?' != buf[*start + 1] &&
+                 '-' != buf[*start + 1]) {
+                 (*start)--;
+                 fputs("Ar", stdout);
+                 return;
+         }
+         (*start)++;
+         for (i = *start; i < end; i++)
+                 if (isalnum((int)buf[i]))
+                         continue;
+                 else if ('?' == buf[i])
+                         continue;
+                 else if ('-' == buf[i])
+                         continue;
+                 else if ('_' == buf[i])
+                         continue;
+                 else
+                         break;
+         assert(i < end);
+         if ( ! (' ' == buf[i] || '>' == buf[i])) {
+                 fputs("Ar", stdout);
+                 return;
+         }
+         printf("Fl ");
+         if (end - *start > 1 &&
+                 isupper((int)buf[*start]) &&
+                 islower((int)buf[*start + 1]) &&
+                 (end - *start == 2 ||
+                  ' ' == buf[*start + 2]))
+                 printf("\\&");
+         printf("%.*s", (int)(i - *start), &buf[*start]);
+         *start = i;
+         if (' ' == buf[i]) {
+                 while (i < end && ' ' == buf[i])
+                         i++;
+                 assert(i < end);
+                 if ('-' == buf[i]) {
+                         *start = i;
+                         goto again;
+                 }
+                 fputs("Ar", stdout);
+                 *start = i;
+         }
  }
  /*
-Line 157  skipspace(const char *buf, size_t *start, size_t end)
+Line 493  skipspace(const char *buf, size_t *start, size_t end)
 Line 157  skipspace(const char *buf, size_t *start, size_t end)
 Line 493  skipspace(const char *buf, size_t *start, size_t end)
   * like X<...> and can contain nested format codes.
   * This consumes the whole format code, and any nested format codes, til
   * the end of matched production.
-  * If "reentrant", then we're being called after a macro has already
+  * If "nomacro", then we don't print any macros, just contained data
-  * been printed to the current line.
+  * (e.g., following "Sh" or "Nm").
-  * "last" is set to the last read character: this is used to determine
+  * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
-  * whether we should buffer with space or not.
+  * as the first format code on a line (for decoration as an "Nm"),
-  * If "nomacro", then we don't print any macros, just contained data.
+  * non-zero otherwise.
+  *
+  * Output mode handling is most complicated here.
+  * We may enter in any mode.
+  * We usually exit in OUST_MAC mode, except when
+  * entering without OUST_MAC and the code is invalid.
   */
  static int
- formatcode(const char *buf, size_t *start,
+ formatcode(struct state *st, const char *buf, size_t *start,
-         size_t end, int reentrant, int last, int nomacro)
+         size_t end, int nomacro, int pos)
  {
+         size_t           i, j, dsz;
          enum fmt         fmt;
+         unsigned char    uc;
+         int              gotmacro, wantws;
          assert(*start + 1 < end);
          assert('<' == buf[*start + 1]);
+         /*
+          * First, look up the format code.
+          * If it's not valid, treat it as a NOOP.
+          */
          for (fmt = 0; fmt < FMT__MAX; fmt++)
                  if (buf[*start] == fmts[fmt])
                          break;
-         /* Invalid macros are just regular text. */
+         /*
+          * Determine whether we're overriding our delimiter.
+          * According to POD, if we have more than one '<' followed by a
+          * space, then we need a space followed by matching '>' to close
+          * the expression.
+          * Otherwise we use the usual '<' and '>' matched pair.
+          */
+         i = *start + 1;
+         while (i < end && '<' == buf[i])
+                 i++;
+         assert(i > *start + 1);
+         dsz = i - (*start + 1);
+         if (dsz > 1 && (i >= end || ' ' != buf[i]))
+                 dsz = 1;
-         if (FMT__MAX == fmt) {
+         /* Remember, if dsz>1, to jump the trailing space. */
-                 putchar(buf[*start]);
+         *start += dsz + 1 + (dsz > 1 ? 1 : 0);
-                 (*start)++;
-                 return(0);
-         }
-         *start += 2;
          /*
-          * Escapes don't print macro sequences, so just output them like
+          * Escapes and ignored codes (NULL and INDEX) don't print macro
-          * normal text before processing for macros.
+          * sequences, so just output them like normal text before
+          * processing for real macros.
           */
          if (FMT_ESCAPE == fmt) {
-                 formatescape(buf, start, end);
+                 formatescape(st, buf, start, end);
                  return(0);
          } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
-                 /* For indices and nulls, just consume. */
+                 /*
-                 while (*start < end && '>' != buf[*start])
+                  * Just consume til the end delimiter, accounting for
+                  * whether it's a custom one.
+                  */
+                 for ( ; *start < end; (*start)++) {
+                         if ('>' != buf[*start])
+                                 continue;
+                         else if (dsz == 1)
+                                 break;
+                         assert(*start > 0);
+                         if (' ' != buf[*start - 1])
+                                 continue;
+                         i = *start;
+                         for (j = 0; i < end && j < dsz; j++)
+                                 if ('>' != buf[i++])
+                                         break;
+                         if (dsz != j)
+                                 continue;
+                         (*start) += dsz;
+                         break;
+                 }
+                 if (*start < end) {
+                         assert('>' == buf[*start]);
                          (*start)++;
-                 if (*start < end)
+                 }
-                         (*start)++;
+                 if (isspace(last))
+                         while (*start < end && isspace((int)buf[*start]))
+                                 (*start)++;
                  return(0);
          }
-         if ( ! nomacro) {
+         /*
+          * Check whether we're supposed to print macro stuff (this is
+          * suppressed in, e.g., "Nm" and "Sh" macros).
+          */
+         if (FMT__MAX != fmt && !nomacro) {
                  /*
-                  * Print out the macro describing this format code.
+                  * Do we need spacing before the upcoming macro,
-                  * If we're not "reentrant" (not yet on a macro line)
+                  * after any pending text already in the outbuf?
-                  * then print a newline, if necessary, and the macro
+                  * We may already have wantws if there was whitespace
-                  * indicator.
+                  * before the code ("text B<text"), or there may be
-                  * Otherwise, offset us with a space.
+                  * whitespace inside our scope ("textB< text").
                   */
-                 if ( ! reentrant && last != '\n')
-                         putchar('\n');
+                 wantws = ' ' == buf[*start] ||
-                 if ( ! reentrant)
+                     (st->wantws && ! st->outbuflen);
+                 /*
+                  * If we are on a text line and there is no
+                  * whitespace before our content, we have to make
+                  * the previous word a prefix to the macro line.
+                  */
+                 if (OUST_MAC != st->oust && ! wantws) {
+                         if (OUST_NL != st->oust)
+                                 mdoc_newln(st);
+                         fputs(".Pf", stdout);
+                         st->oust = OUST_MAC;
+                         st->wantws = wantws = 1;
+                 }
+                 outbuf_flush(st);
+                 /* Whitespace is easier to suppress on macro lines. */
+                 if (OUST_MAC == st->oust && ! wantws)
+                         printf(" Ns");
+                 /* Unless we are on a macro line, start one. */
+                 if (OUST_MAC != st->oust) {
+                         if (OUST_NL != st->oust)
+                                 mdoc_newln(st);
                          putchar('.');
-                 else
+                         st->oust = OUST_MAC;
+                 } else
                          putchar(' ');
+                 st->wantws = 1;
                  /*
-                  * If we don't have whitespace before us, then suppress
+                  * Print the macro corresponding to this format code,
-                  * macro whitespace with Ns.
+                  * and update the output state afterwards.
                   */
-                 if (' ' != last)
-                         printf("Ns ");
                  switch (fmt) {
                  case (FMT_ITALIC):
-                         printf("Em ");
+                         fputs("Em", stdout);
                          break;
                  case (FMT_BOLD):
-                         printf("Sy ");
+                         if (SECT_SYNOPSIS == st->sect) {
+                                 if (1 == dsz && '-' == buf[*start])
+                                         dosynopsisfl(buf, start, end);
+                                 else if (0 == pos)
+                                         fputs("Nm", stdout);
+                                 else
+                                         fputs("Ar", stdout);
+                                 break;
+                         }
+                         i = 0;
+                         uc = buf[*start];
+                         while (isalnum(uc) || '_' == uc || ' ' == uc)
+                                 uc = buf[*start + ++i];
+                         if ('=' != uc && '>' != uc)
+                                 i = 0;
+                         if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
+                                 fputs("Dv", stdout);
+                                 break;
+                         }
+                         switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
+                         case MDOC_Fa:
+                                 fputs("Fa", stdout);
+                                 break;
+                         case MDOC_Vt:
+                                 fputs("Vt", stdout);
+                                 break;
+                         default:
+                                 fputs("Sy", stdout);
+                                 break;
+                         }
                          break;
                  case (FMT_CODE):
-                         printf("Qo Li ");
+                         fputs("Qo Li", stdout);
                          break;
                  case (FMT_LINK):
-                         printf("Lk ");
+                         /* Try to link; use "No" if it's empty. */
+                         if ( ! trylink(buf, start, end, dsz))
+                                 fputs("No", stdout);
                          break;
                  case (FMT_FILE):
-                         printf("Pa ");
+                         fputs("Pa", stdout);
                          break;
                  case (FMT_NBSP):
-                         /* TODO. */
+                         fputs("No", stdout);
-                         printf("No ");
                          break;
                  default:
                          abort();
                  }
+         } else {
+                 outbuf_flush(st);
+                 st->wantws = 0;
          }
          /*
-          * Read until we reach the end market ('>') or until we find a
+          * Process until we reach the end marker (e.g., '>') or until we
-          * nested format code.
+          * find a nested format code.
           * Don't emit any newlines: since we're on a macro line, we
           * don't want to break the line.
           */
+         gotmacro = 0;
          while (*start < end) {
-                 if ('>' == buf[*start]) {
+                 if ('>' == buf[*start] && 1 == dsz) {
                          (*start)++;
                          break;
+                 } else if ('>' == buf[*start] &&
+                                 ' ' == buf[*start - 1]) {
+                         /*
+                          * Handle custom delimiters.
+                          * These require a certain number of
+                          * space-preceded carrots before we're really at
+                          * the end.
+                          */
+                         i = *start;
+                         for (j = 0; i < end && j < dsz; j++)
+                                 if ('>' != buf[i++])
+                                         break;
+                         if (dsz == j) {
+                                 *start += dsz;
+                                 break;
+                         }
                  }
-                 if (*start + 1 < end && '<' == buf[*start + 1]) {
+                 if (*start + 1 < end && '<' == buf[*start + 1] &&
-                         formatcode(buf, start, end, 1, last, nomacro);
+                     'A' <= buf[*start] && 'Z' >= buf[*start]) {
+                         gotmacro = formatcode(st, buf,
+                             start, end, nomacro, 1);
                          continue;
                  }
+                 /* Suppress newlines and multiple spaces. */
+                 last = buf[(*start)++];
+                 if (isspace(last)) {
+                         outbuf_flush(st);
+                         st->wantws = 1;
+                         gotmacro = 0;
+                         while (*start < end &&
+                             isspace((unsigned char)buf[*start]))
+                                 (*start)++;
+                         continue;
+                 }
+                 if (OUST_MAC == st->oust && FMT__MAX != fmt) {
+                         if (gotmacro && ! st->wantws) {
+                                 printf(" Ns");
+                                 st->wantws = 1;
+                         }
+                         gotmacro = 0;
                          /*
-                          * Make sure that any macro-like words (or
+                          * Escape macro-like words.
-                          * really any word starting with a capital
+                          * This matches "Xx " and "XxEOLN".
-                          * letter) is assumed to be a macro that must be
-                          * escaped.
-                          * XXX: should this be isalpha()?
                           */
-                         if ((' ' == last || '\n' == last) &&
-                                 isupper(buf[*start]))
-                                 printf("\\&");
-                 last = buf[*start];
+                         if (*start < end && ! st->outbuflen &&
-                 if ('\n' == last)
+                             isupper(last) &&
-                         last = ' ';
+                             islower((unsigned char)buf[*start]) &&
-                 putchar(last);
+                             (end - *start == 1 ||
+                              ' ' == buf[*start + 1] ||
-                 (*start)++;
+                              '>' == buf[*start + 1]))
+                                 outbuf_addstr(st, "\\&");
+                         last = buf[*start - 1];
+                 }
+                 outbuf_addchar(st);
          }
-         if ( ! nomacro && FMT_CODE == fmt)
+         if (FMT__MAX == fmt)
-                 printf(" Qc ");
+                 return(0);
-         if (reentrant)
+         outbuf_flush(st);
-                 return(1);
-         /*
+         if ( ! nomacro && FMT_CODE == fmt)
-          * If we're not reentrant, we want to put ending punctuation on
+                 fputs(" Qc", stdout);
-          * the macro line so that it's properly handled by being
-          * smooshed against the terminal word.
+         st->wantws = ' ' == last;
-          */
-         skipspace(buf, start, end);
-         if (',' != buf[*start] && '.' != buf[*start] &&
-                 '!' != buf[*start] && '?' != buf[*start] &&
-                 ')' != buf[*start])
-                 return(1);
-         while (*start < end) {
-                 if (',' != buf[*start] &&
-                         '.' != buf[*start] &&
-                         '!' != buf[*start] &&
-                         '?' != buf[*start] &&
-                         ')' != buf[*start])
-                         break;
-                 putchar(' ');
-                 putchar(buf[*start]);
-                 (*start)++;
-         }
-         skipspace(buf, start, end);
          return(1);
  }
  /*
   * Calls formatcode() til the end of a paragraph.
+  * Goes to OUST_MAC mode and stays there when returning,
+  * such that the caller can add arguments to the macro line
+  * before closing it out.
   */
  static void
- formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
+ formatcodeln(struct state *st, const char *linemac,
+         const char *buf, size_t *start, size_t end, int nomacro)
  {
-         int              last;
+         int      gotmacro;
-         last = '\n';
+         assert(OUST_NL == st->oust);
+         assert(st->wantws);
+         printf(".%s", linemac);
+         st->oust = OUST_MAC;
+         gotmacro = 0;
          while (*start < end)  {
-                 if (*start + 1 < end && '<' == buf[*start + 1]) {
+                 if (*start + 1 < end && '<' == buf[*start + 1] &&
-                         formatcode(buf, start, end, 1, last, nomacro);
+                     'A' <= buf[*start] && 'Z' >= buf[*start]) {
+                         gotmacro = formatcode(st, buf,
+                             start, end, nomacro, 1);
                          continue;
                  }
-                 if ('\n' != buf[*start])
-                         putchar(last = buf[*start]);
+                 /* Suppress newlines and multiple spaces. */
-                 (*start)++;
+                 last = buf[(*start)++];
+                 if (isspace(last)) {
+                         outbuf_flush(st);
+                         st->wantws = 1;
+                         while (*start < end &&
+                             isspace((unsigned char)buf[*start]))
+                                 (*start)++;
+                         continue;
+                 }
+                 if (gotmacro) {
+                         if (*start < end) {
+                                 if (st->wantws)
+                                         printf(" No");
+                                 else
+                                         printf(" Ns");
+                         }
+                         st->wantws = 1;
+                         gotmacro = 0;
+                 }
+                 /*
+                  * Since we're already on a macro line, we want to make
+                  * sure that we don't inadvertently invoke a macro.
+                  * We need to do this carefully because section names
+                  * are used in troff and we don't want to escape
+                  * something that needn't be escaped.
+                  */
+                 if (*start < end && ! st->outbuflen && isupper(last) &&
+                     islower((unsigned char)buf[*start]) &&
+                     (end - *start == 1 || ' ' == buf[*start + 1])) {
+                         outbuf_addstr(st, "\\&");
+                         last = buf[*start - 1];
+                 }
+                 outbuf_addchar(st);
          }
+         outbuf_flush(st);
+         st->wantws = 1;
  }
  /*
+  * Guess at what kind of list we are.
+  * These are taken straight from the POD manual.
+  * I don't know what people do in real life.
+  */
+ static enum list
+ listguess(const char *buf, size_t start, size_t end)
+ {
+         size_t           len = end - start;
+         assert(end >= start);
+         if (len == 1 && '*' == buf[start])
+                 return(LIST_BULLET);
+         if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
+                 return(LIST_ENUM);
+         else if (len == 1 && '1' == buf[start])
+                 return(LIST_ENUM);
+         else
+                 return(LIST_TAG);
+ }
+ /*
   * A command paragraph, as noted in the perlpod manual, just indicates
   * that we should do something, optionally with some text to print as
   * well.
+  * From the perspective of external callers,
+  * always stays in OUST_NL/wantws mode,
+  * but its children do use OUST_MAC.
   */
  static void
  command(struct state *st, const char *buf, size_t start, size_t end)
-Line 363  command(struct state *st, const char *buf, size_t star
+Line 901  command(struct state *st, const char *buf, size_t star
 Line 363  command(struct state *st, const char *buf, size_t star
 Line 901  command(struct state *st, const char *buf, size_t star
                  return;
          start += csz;
-         skipspace(buf, &start, end);
+         while (start < end && ' ' == buf[start])
+                 start++;
          len = end - start;
          if (st->paused) {
-Line 379  command(struct state *st, const char *buf, size_t star
+Line 919  command(struct state *st, const char *buf, size_t star
 Line 379  command(struct state *st, const char *buf, size_t star
 Line 919  command(struct state *st, const char *buf, size_t star
                   * The behaviour of head= follows from a quick glance at
                   * how pod2man handles it.
                   */
-                 printf(".Sh ");
+                 st->sect = SECT_NONE;
-                 st->isname = 0;
+                 if (end - start == 4) {
-                 if (end - start == 4)
                          if (0 == memcmp(&buf[start], "NAME", 4))
-                                 st->isname = 1;
+                                 st->sect = SECT_NAME;
-                 formatcodeln(buf, &start, end, 1);
+                 } else if (end - start == 8) {
-                 putchar('\n');
+                         if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
+                                 st->sect = SECT_SYNOPSIS;
+                 }
+                 formatcodeln(st, "Sh", buf, &start, end, 1);
+                 mdoc_newln(st);
                  st->haspar = 1;
                  break;
          case (CMD_HEAD2):
-                 printf(".Ss ");
+                 formatcodeln(st, "Ss", buf, &start, end, 1);
-                 formatcodeln(buf, &start, end, 1);
+                 mdoc_newln(st);
-                 putchar('\n');
                  st->haspar = 1;
                  break;
          case (CMD_HEAD3):
                  puts(".Pp");
-                 printf(".Em ");
+                 formatcodeln(st, "Em", buf, &start, end, 0);
-                 formatcodeln(buf, &start, end, 0);
+                 mdoc_newln(st);
-                 putchar('\n');
                  puts(".Pp");
                  st->haspar = 1;
                  break;
          case (CMD_HEAD4):
                  puts(".Pp");
-                 printf(".No ");
+                 formatcodeln(st, "No", buf, &start, end, 0);
-                 formatcodeln(buf, &start, end, 0);
+                 mdoc_newln(st);
-                 putchar('\n');
                  puts(".Pp");
                  st->haspar = 1;
                  break;
          case (CMD_OVER):
                  /*
-                  * TODO: we should be doing this after we process the
+                  * If we have an existing list that hasn't had an =item
-                  * first =item to see whether we'll do an -enum,
+                  * yet, then make sure that we open it now.
-                  * -bullet, or something else.
+                  * We use the default list type, but that can't be
+                  * helped (we haven't seen any items yet).
                   */
-                 puts(".Bl -tag -width Ds");
+                 if (st->lpos > 0)
+                         if (LIST__MAX == st->lstack[st->lpos - 1]) {
+                                 st->lstack[st->lpos - 1] = LIST_TAG;
+                                 puts(".Bl -tag -width Ds");
+                         }
+                 st->lpos++;
+                 assert(st->lpos < LIST_STACKSZ);
+                 st->lstack[st->lpos - 1] = LIST__MAX;
                  break;
          case (CMD_ITEM):
-                 printf(".It ");
+                 if (0 == st->lpos) {
-                 formatcodeln(buf, &start, end, 0);
+                         /*
-                 putchar('\n');
+                          * Bad markup.
+                          * Try to compensate.
+                          */
+                         st->lstack[st->lpos] = LIST__MAX;
+                         st->lpos++;
+                 }
+                 assert(st->lpos > 0);
+                 /*
+                  * If we're the first =item, guess at what our content
+                  * will be: "*" is a bullet list, "1." is a numbered
+                  * list, and everything is tagged.
+                  */
+                 if (LIST__MAX == st->lstack[st->lpos - 1]) {
+                         st->lstack[st->lpos - 1] =
+                                 listguess(buf, start, end);
+                         switch (st->lstack[st->lpos - 1]) {
+                         case (LIST_BULLET):
+                                 puts(".Bl -bullet");
+                                 break;
+                         case (LIST_ENUM):
+                                 puts(".Bl -enum");
+                                 break;
+                         default:
+                                 puts(".Bl -tag -width Ds");
+                                 break;
+                         }
+                 }
+                 switch (st->lstack[st->lpos - 1]) {
+                 case (LIST_TAG):
+                         formatcodeln(st, "It", buf, &start, end, 0);
+                         mdoc_newln(st);
+                         break;
+                 case (LIST_ENUM):
+                         /* FALLTHROUGH */
+                 case (LIST_BULLET):
+                         /*
+                          * Abandon the remainder of the paragraph
+                          * because we're going to be a bulletted or
+                          * numbered list.
+                          */
+                         puts(".It");
+                         break;
+                 default:
+                         abort();
+                 }
                  st->haspar = 1;
                  break;
          case (CMD_BACK):
-                 puts(".El");
+                 /* Make sure we don't back over the stack. */
+                 if (st->lpos > 0) {
+                         st->lpos--;
+                         puts(".El");
+                 }
                  break;
          case (CMD_BEGIN):
                  /*
-Line 454  command(struct state *st, const char *buf, size_t star
+Line 1050  command(struct state *st, const char *buf, size_t star
 Line 454  command(struct state *st, const char *buf, size_t star
 Line 1050  command(struct state *st, const char *buf, size_t star
  }
  /*
+  * Put the type provided as an argument into the dictionary.
+  */
+ static void
+ register_type(const char *ptype)
+ {
+         const char      *pname, *pend;
+         pname = ptype;
+         while (isalnum((unsigned char)*pname) || '_' == *pname)
+                 pname++;
+         if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
+             (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
+                 while (' ' == *pname)
+                         pname++;
+                 pend = pname;
+                 while (isalnum((unsigned char)*pend) || '_' == *pend)
+                         pend++;
+                 if (pend > pname)
+                         dict_put(pname, pend - pname, MDOC_Vt);
+         } else
+                 pend = pname;
+         if (pend > ptype)
+                 dict_put(ptype, pend - ptype, MDOC_Vt);
+ }
+ /*
   * Just pump out the line in a verbatim block.
+  * From the perspective of external callers,
+  * always stays in OUST_NL/wantws mode.
   */
  static void
- verbatim(struct state *st, const char *buf, size_t start, size_t end)
+ verbatim(struct state *st, char *buf, size_t start, size_t end)
  {
+         size_t           i, ift, ifo, ifa, ifc, inl;
+         char            *cp, *cp2;
+         int              indisplay, nopen, wantsp;
-         if ( ! st->parsing || st->paused)
+         if (st->paused || ! st->parsing)
                  return;
-         puts(".Bd -literal");
+         indisplay = wantsp = 0;
-         printf("%.*s\n", (int)(end - start), &buf[start]);
-         puts(".Ed");
+ again:
+         if (start == end) {
+                 if (indisplay)
+                         puts(".Ed");
+                 return;
+         }
+         if ('\n' == buf[start]) {
+                 wantsp = 1;
+                 start++;
+                 goto again;
+         }
+         /*
+          * If we're in the SYNOPSIS, see if we're an #include block.
+          * If we are, then print the "In" macro and re-loop.
+          * This handles any number of inclusions, but only when they
+          * come before the remaining parts...
+          */
+         if (SECT_SYNOPSIS == st->sect) {
+                 i = start;
+                 while (i < end && buf[i] == ' ')
+                         i++;
+                 if (i == end)
+                         goto again;
+                 /* We're an include block! */
+                 if (end - i > 10 &&
+== memcmp(&buf[i], "#include <", 10)) {
+                         start = i + 10;
+                         while (start < end && ' ' == buf[start])
+                                 start++;
+                         if (indisplay)
+                                 puts(".Ed");
+                         indisplay = wantsp = 0;
+                         fputs(".In ", stdout);
+                         /* Stop til the '>' marker or we hit eoln. */
+                         while (start < end &&
+                                 '>' != buf[start] && '\n' != buf[start])
+                                 putchar(buf[start++]);
+                         putchar('\n');
+                         if (start < end && '>' == buf[start])
+                                 start++;
+                         if (start < end && '\n' == buf[start])
+                                 start++;
+                         goto again;
+                 }
+                 /* Other preprocessor directives. */
+                 if ('#' == buf[i]) {
+                         if (indisplay)
+                                 puts(".Ed");
+                         indisplay = wantsp = 0;
+                         fputs(".Fd ", stdout);
+                         start = i;
+                         while(start < end && '\n' != buf[start])
+                                 putchar(buf[start++]);
+                         putchar('\n');
+                         if (start < end && '\n' == buf[start])
+                                 start++;
+                         /* Remember #define for Dv or Fn. */
+                         if (strncmp(buf + i + 1, "define", 6) ||
+                             ! isspace((unsigned char)buf[i + 7]))
+                                 goto again;
+                         ifo = i + 7;
+                         while (ifo < start &&
+                             isspace((unsigned char)buf[ifo]))
+                                 ifo++;
+                         ifa = ifo;
+                         while ('_' == buf[ifa] ||
+                             isalnum((unsigned char)buf[ifa]))
+                                 ifa++;
+                         dict_put(buf + ifo, ifa - ifo,
+                             '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv);
+                         goto again;
+                 }
+                 /* Parse function declaration. */
+                 ifo = ifa = ifc = 0;
+                 inl = end;
+                 nopen = 0;
+                 for (ift = i; i < end; i++) {
+                         if (ifc) {
+                                 if (buf[i] != '\n')
+                                         continue;
+                                 inl = i;
+                                 break;
+                         }
+                         switch (buf[i]) {
+                         case '\t':
+                                 /* FALLTHROUGH */
+                         case ' ':
+                                 if ( ! ifa)
+                                         ifo = i;
+                                 break;
+                         case '(':
+                                 if (ifo) {
+                                         nopen++;
+                                         if ( ! ifa)
+                                                 ifa = i;
+                                 } else
+                                         i = end;
+                                 break;
+                         case ')':
+                                 switch (nopen) {
+                                 case 0:
+                                         i = end;
+                                         break;
+                                 case 1:
+                                         ifc = i;
+                                         break;
+                                 default:
+                                         nopen--;
+                                         break;
+                                 }
+                                 break;
+                         default:
+                                 break;
+                         }
+                 }
+                 /* Encode function declaration. */
+                 if (ifc) {
+                         for (i = ifa; i < ifc; i++)
+                                 if (buf[i] == '\n')
+                                         buf[i] = ' ';
+                         buf[ifo++] = '\0';
+                         register_type(buf + ift);
+                         if (indisplay)
+                                 puts(".Ed");
+                         indisplay = wantsp = 0;
+                         printf(".Ft %s", buf + ift);
+                         if (buf[ifo] == '*') {
+                                 fputs(" *", stdout);
+                                 ifo++;
+                         }
+                         putchar('\n');
+                         buf[ifa++] = '\0';
+                         printf(".Fo %s\n", buf + ifo);
+                         dict_put(buf + ifo, 0, MDOC_Fo);
+                         buf[ifc++] = '\0';
+                         for (;;) {
+                                 cp = strchr(buf + ifa, ',');
+                                 if (cp != NULL) {
+                                         cp2 = cp;
+                                         *cp++ = '\0';
+                                 } else
+                                         cp2 = strchr(buf + ifa, '\0');
+                                 while (isalnum((unsigned char)cp2[-1]) ||
+                                     '_' == cp2[-1])
+                                         cp2--;
+                                 if ('\0' != *cp2)
+                                         dict_put(cp2, 0, MDOC_Fa);
+                                 register_type(buf + ifa);
+                                 if (strchr(buf + ifa, ' ') == NULL)
+                                         printf(".Fa %s\n", buf + ifa);
+                                 else
+                                         printf(".Fa \"%s\"\n", buf + ifa);
+                                 if (cp == NULL)
+                                         break;
+                                 while (*cp == ' ' || *cp == '\t')
+                                         cp++;
+                                 ifa = cp - buf;
+                         }
+                         puts(".Fc");
+                         if (buf[ifc] == ';')
+                                 ifc++;
+                         if (ifc < inl) {
+                                 buf[inl] = '\0';
+                                 puts(buf + ifc);
+                         }
+                         start = inl < end ? inl + 1 : end;
+                         goto again;
+                 }
+         }
+         if ( ! indisplay)
+                 puts(".Bd -literal");
+         else if (wantsp)
+                 putchar('\n');
+         indisplay = 1;
+         wantsp = 0;
+         for (last = '\n'; start < end; start++) {
+                 /*
+                  * Handle accidental macros (newline starting with
+                  * control character) and escapes.
+                  */
+                 if ('\n' == last) {
+                         if ('\n' == buf[start])
+                                 goto again;
+                         if ('.' == buf[start] || '\'' == buf[start])
+                                 printf("\\&");
+                 }
+                 putchar(last = buf[start]);
+                 if ('\\' == buf[start])
+                         printf("e");
+         }
+         if ('\n' != last)
+                 putchar('\n');
+         if (indisplay)
+                 puts(".Ed");
  }
  /*
+  * See dosynopsisop().
+  */
+ static int
+ hasmatch(const char *buf, size_t start, size_t end)
+ {
+         size_t   stack;
+         for (stack = 0; start < end; start++)
+                 if (buf[start] == '[')
+                         stack++;
+                 else if (buf[start] == ']' && 0 == stack)
+                         return(1);
+                 else if (buf[start] == ']')
+                         stack--;
+         return(0);
+ }
+ /*
+  * If we're in the SYNOPSIS section and we've encounter braces in an
+  * ordinary paragraph, then try to see whether we're an [-option].
+  * Do this, if we're an opening bracket, by first seeing if we have a
+  * matching end via hasmatch().
+  * If we're an ending bracket, see if we have a stack already.
+  */
+ static int
+ dosynopsisop(struct state *st, const char *buf,
+         size_t *start, size_t end, size_t *opstack)
+ {
+         assert('[' == buf[*start] || ']' == buf[*start]);
+         if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
+                 mdoc_newln(st);
+                 puts(".Oo");
+                 (*opstack)++;
+         } else if ('[' == buf[*start])
+                 return(0);
+         if (']' == buf[*start] && *opstack > 0) {
+                 mdoc_newln(st);
+                 puts(".Oc");
+                 (*opstack)--;
+         } else if (']' == buf[*start])
+                 return(0);
+         (*start)++;
+         last = '\n';
+         while (' ' == buf[*start])
+                 (*start)++;
+         return(1);
+ }
+ /*
+  * Format multiple "Nm" manpage names in the NAME section.
+  * From the perspective of external callers,
+  * always stays in OUST_NL/wantws mode,
+  * but its children do use OUST_MAC.
+  */
+ static void
+ donamenm(struct state *st, const char *buf, size_t *start, size_t end)
+ {
+         size_t   word;
+         assert(OUST_NL == st->oust);
+         assert(st->wantws);
+         while (*start < end && isspace((unsigned char)buf[*start]))
+                 (*start)++;
+         if (end == *start) {
+                 puts(".Nm unknown");
+                 return;
+         }
+         while (*start < end) {
+                 for (word = *start; word < end; word++)
+                         if (',' == buf[word])
+                                 break;
+                 formatcodeln(st, "Nm", buf, start, word, 1);
+                 if (*start == end) {
+                         mdoc_newln(st);
+                         break;
+                 }
+                 assert(',' == buf[*start]);
+                 printf(" ,");
+                 mdoc_newln(st);
+                 (*start)++;
+                 while (*start < end && isspace((unsigned char)buf[*start]))
+                         (*start)++;
+         }
+ }
+ /*
   * Ordinary paragraph.
   * Well, this is really the hardest--POD seems to assume that, for
   * example, a leading space implies a newline, and so on.
   * Lots of other snakes in the grass: escaping a newline followed by a
   * period (accidental mdoc(7) control), double-newlines after macro
   * passages, etc.
+  *
+  * Uses formatcode() to go to OUST_MAC mode
+  * and outbuf_flush() to go to OUST_TXT mode.
+  * In text mode, wantws requests white space before the text
+  * currently contained in the outbuf, not before upcoming text.
+  * Must make sure to go back to OUST_NL/wantws mode before returning.
   */
  static void
  ordinary(struct state *st, const char *buf, size_t start, size_t end)
  {
-         int             last;
+         size_t          i, j, opstack, wend;
-         size_t          i, j;
+         enum mdoc_type  mtype;
+         int             eos, noeos, seq;
+         char            savechar;
          if ( ! st->parsing || st->paused)
                  return;
-Line 491  ordinary(struct state *st, const char *buf, size_t sta
+Line 1424  ordinary(struct state *st, const char *buf, size_t sta
 Line 491  ordinary(struct state *st, const char *buf, size_t sta
 Line 1424  ordinary(struct state *st, const char *buf, size_t sta
           * we're in "name - description" format.
           * To wit, print out a "Nm" and "Nd" in that format.
           */
-         if (st->isname) {
+         if (SECT_NAME == st->sect) {
-                 for (i = end - 1; i > start; i--)
+                 for (i = end - 2; i > start; i--)
-                         if ('-' == buf[i])
+                         if ('-' == buf[i] &&
+                             isspace((unsigned char)buf[i + 1]))
                                  break;
                  if ('-' == buf[i]) {
                          j = i;
-Line 501  ordinary(struct state *st, const char *buf, size_t sta
+Line 1435  ordinary(struct state *st, const char *buf, size_t sta
 Line 501  ordinary(struct state *st, const char *buf, size_t sta
 Line 1435  ordinary(struct state *st, const char *buf, size_t sta
                          for ( ; i > start; i--)
                                  if ('-' != buf[i])
                                          break;
-                         printf(".Nm %.*s\n",
+                         donamenm(st, buf, &start, i + 1);
-                                 (int)((i + 1) - start), &buf[start]);
+                         start = j + 1;
-                         printf(".Nd %.*s\n",
+                         while (start < end &&
-                                 (int)(end - (j + 1)), &buf[j + 1]);
+                              isspace((unsigned char)buf[start]))
+                                 start++;
+                         while (start < end && '.' == buf[end - 1])
+                                 end--;
+                         formatcodeln(st, "Nd", buf, &start, end, 1);
+                         mdoc_newln(st);
                          return;
                  }
          }
-Line 514  ordinary(struct state *st, const char *buf, size_t sta
+Line 1453  ordinary(struct state *st, const char *buf, size_t sta
 Line 514  ordinary(struct state *st, const char *buf, size_t sta
 Line 1453  ordinary(struct state *st, const char *buf, size_t sta
          st->haspar = 0;
          last = '\n';
+         opstack = 0;
-         while (start < end) {
+         for (seq = 0; start < end; seq++) {
                  /*
                   * Loop til we get either to a newline or escape.
                   * Escape initial control characters.
                   */
                  while (start < end) {
-                         if (start < end - 1 && '<' == buf[start + 1])
+                         if (start < end - 1 && '<' == buf[start + 1] &&
+                             'A' <= buf[start] && 'Z' >= buf[start])
                                  break;
                          else if ('\n' == buf[start])
                                  break;
                          else if ('\n' == last && '.' == buf[start])
-                                 printf("\\&");
+                                 outbuf_addstr(st, "\\&");
                          else if ('\n' == last && '\'' == buf[start])
-                                 printf("\\&");
+                                 outbuf_addstr(st, "\\&");
-                         putchar(last = buf[start++]);
+                         /*
-                 }
+                          * If we're in the SYNOPSIS, have square
+                          * brackets indicate that we're opening and
+                          * closing an optional context.
+                          */
-                 if (start < end - 1 && '<' == buf[start + 1]) {
+                         if (SECT_SYNOPSIS == st->sect &&
+                                 ('[' == buf[start] ||
+                                  ']' == buf[start]) &&
+                                 dosynopsisop(st, buf,
+                                     &start, end, &opstack))
+                                 continue;
+                         /* Merely buffer non-whitespace. */
+                         last = buf[start++];
+                         if ( ! isspace(last))
+                                 outbuf_addchar(st);
+                         if (start < end &&
+                             ! isspace((unsigned char)buf[start - 1]) &&
+                             ! isspace((unsigned char)buf[start]))
+                                 continue;
                          /*
-                          * We've encountered a format code.
+                          * Found the end of a word.
-                          * This is going to trigger a macro no matter
+                          * Rewind trailing delimiters.
-                          * what, so print a newline now.
-                          * Then print the (possibly nested) macros and
-                          * following that, a newline.
                           */
-                         if (formatcode(buf, &start, end, 0, last, 0))
-                                 putchar(last = '\n');
+                         eos = noeos = 0;
-                 } else if (start < end && '\n' == buf[start]) {
+                         for (wend = st->outbuflen; wend; wend--)
+                                 if ('.' == st->outbuf[wend - 1] ||
+                                     '!' == st->outbuf[wend - 1] ||
+                                     '?' == st->outbuf[wend - 1])
+                                         eos = 1;
+                                 else if ('|' == st->outbuf[wend - 1] ||
+                                     ',' == st->outbuf[wend - 1] ||
+                                     ';' == st->outbuf[wend - 1] ||
+                                     ':' == st->outbuf[wend - 1])
+                                         noeos = 1;
+                                 else if ('\'' != st->outbuf[wend - 1] &&
+                                     '"' != st->outbuf[wend - 1] &&
+                                     ')' != st->outbuf[wend - 1] &&
+                                     ']' != st->outbuf[wend - 1])
+                                         break;
+                         eos &= ! noeos;
                          /*
-                          * Print the newline only if we haven't already
+                          * Detect function names.
-                          * printed a newline.
                           */
-                         if (last != '\n')
-                                 putchar(last = buf[start]);
+                         mtype = MDOC_Fa;
+                         savechar = '\0';
+                         if (wend && ')' == st->outbuf[wend] &&
+                             '(' == st->outbuf[wend - 1]) {
+                                 mtype = dict_get(st->outbuf, --wend);
+                                 if (MDOC_Dv == mtype)
+                                         mtype = MDOC_Fo;
+                                 if (MDOC_Fo == mtype || MDOC_MAX == mtype) {
+                                         st->outbuflen = wend;
+                                         st->outbuf[wend] = '\0';
+                                         mdoc_newln(st);
+                                         if (MDOC_Fo == mtype)
+                                                 fputs(".Fn", stdout);
+                                         else
+                                                 fputs(".Xr", stdout);
+                                         st->oust = OUST_MAC;
+                                 }
+                         } else {
+                                 mtype = dict_get(st->outbuf, wend);
+                                 if (MDOC_Dv == mtype) {
+                                         savechar = st->outbuf[wend];
+                                         st->outbuf[wend] = '\0';
+                                         mdoc_newln(st);
+                                         fputs(".Dv", stdout);
+                                         st->oust = OUST_MAC;
+                                 } else
+                                         mtype = MDOC_Fa;
+                         }
+                         /*
+                          * On whitespace, flush the output buffer
+                          * and allow breaking to a macro line.
+                          */
+                         outbuf_flush(st);
+                         /*
+                          * End macro lines, and
+                          * end text lines at the end of sentences.
+                          */
+                         if (OUST_MAC == st->oust || (eos && wend > 1 &&
+                             islower((unsigned char)st->outbuf[wend - 1]))) {
+                                 if (MDOC_MAX == mtype)
+                                         fputs(" 3", stdout);
+                                 if (MDOC_Fa != mtype) {
+                                         if (MDOC_Dv == mtype)
+                                                 st->outbuf[wend] = savechar;
+                                         else
+                                                 wend += 2;
+                                         while ('\0' != st->outbuf[wend])
+                                                 printf(" %c",
+                                                     st->outbuf[wend++]);
+                                 }
+                                 mdoc_newln(st);
+                         }
+                         /* Advance to the next word. */
+                         while ('\n' != buf[start] &&
+                                isspace((unsigned char)buf[start]))
+                                 start++;
+                         st->wantws = 1;
+                 }
+                 if (start < end - 1 && '<' == buf[start + 1] &&
+                     'A' <= buf[start] && 'Z' >= buf[start]) {
+                         formatcode(st, buf, &start, end, 0, seq);
+                         if (OUST_MAC == st->oust) {
+                                 /*
+                                  * Let mdoc(7) handle trailing punctuation.
+                                  * XXX Some punctuation characters
+                                  *     are not handled yet.
+                                  */
+                                 if ((start == end - 1 ||
+                                      (start < end - 1 &&
+                                       (' ' == buf[start + 1] ||
+                                        '\n' == buf[start + 1]))) &&
+                                     NULL != strchr("|.,;:?!)]", buf[start])) {
+                                         putchar(' ');
+                                         putchar(buf[start++]);
+                                 }
+                                 if (st->wantws ||
+                                     ' ' == buf[start] ||
+                                     '\n' == buf[start])
+                                         mdoc_newln(st);
+                                 /*
+                                  * Consume all whitespace
+                                  * so we don't accidentally start
+                                  * an implicit literal line.
+                                  */
+                                 while (start < end && ' ' == buf[start])
+                                         start++;
+                                 /*
+                                  * Some text is following.
+                                  * Implement requested spacing.
+                                  */
+                                 if ( ! st->wantws && start < end &&
+                                     ('<' != buf[start + 1] ||
+                                      'A' > buf[start] ||
+                                      'Z' < buf[start])) {
+                                         fputs(" Ns", stdout);
+                                         st->wantws = 1;
+                                 }
+                         }
+                 } else if (start < end && '\n' == buf[start]) {
+                         outbuf_flush(st);
+                         mdoc_newln(st);
                          if (++start >= end)
                                  continue;
                          /*
-Line 559  ordinary(struct state *st, const char *buf, size_t sta
+Line 1643  ordinary(struct state *st, const char *buf, size_t sta
 Line 559  ordinary(struct state *st, const char *buf, size_t sta
 Line 1643  ordinary(struct state *st, const char *buf, size_t sta
                           * have a macro subsequent it, which may be
                           * possible if we have an escape next.
                           */
-                         if (' ' == buf[start] || '\t' == buf[start]) {
+                         if (' ' == buf[start] || '\t' == buf[start])
                                  puts(".br");
-                                 last = '\n';
-                         }
                          for ( ; start < end; start++)
                                  if (' ' != buf[start] && '\t' != buf[start])
                                          break;
-                 } else if (start < end) {
+                 }
-                         /*
-                          * Default: print the character.
-                          * Escape initial control characters.
-                          */
-                         if ('\n' == last && '.' == buf[start])
-                                 printf("\\&");
-                         else if ('\n' == last && '\'' == buf[start])
-                                 printf("\\&");
-                         putchar(last = buf[start++]);
-                 }
          }
+         outbuf_flush(st);
-         if (last != '\n')
+         mdoc_newln(st);
-                 putchar('\n');
  }
  /*
-Line 589  ordinary(struct state *st, const char *buf, size_t sta
+Line 1660  ordinary(struct state *st, const char *buf, size_t sta
 Line 589  ordinary(struct state *st, const char *buf, size_t sta
 Line 1660  ordinary(struct state *st, const char *buf, size_t sta
   * (default: starts with "=").
   */
  static void
- dopar(struct state *st, const char *buf, size_t start, size_t end)
+ dopar(struct state *st, char *buf, size_t start, size_t end)
  {
+         assert(OUST_NL == st->oust);
+         assert(st->wantws);
          if (end == start)
                  return;
          if (' ' == buf[start] || '\t' == buf[start])
-Line 608  dopar(struct state *st, const char *buf, size_t start,
+Line 1682  dopar(struct state *st, const char *buf, size_t start,
 Line 608  dopar(struct state *st, const char *buf, size_t start,
 Line 1682  dopar(struct state *st, const char *buf, size_t start,
   */
  static void
  dofile(const struct args *args, const char *fname,
-         const struct tm *tm, const char *buf, size_t sz)
+         const struct tm *tm, char *buf, size_t sz)
  {
-         size_t           sup, end, i, cur = 0;
-         struct state     st;
-         const char      *section, *date;
          char             datebuf[64];
+         struct state     st;
+         const char      *fbase, *fext, *section, *date, *format;
          char            *title, *cp;
+         size_t           cur, end;
+         int              verb;
          if (0 == sz)
                  return;
-         /* Title is last path component of the filename. */
+         /*
+          * Parsing the filename is almost always required,
+          * except when both the title and the section
+          * are provided on the command line.
+          */
-         if (NULL != args->title)
+         if (NULL == args->title || NULL == args->section) {
-                 title = strdup(args->title);
+                 fbase = strrchr(fname, '/');
-         else if (NULL != (cp = strrchr(fname, '/')))
+                 if (NULL == fbase)
-                 title = strdup(cp + 1);
+                         fbase = fname;
-         else
+                 else
-                 title = strdup(fname);
+                         fbase++;
+                 fext = strrchr(fbase, '.');
+         } else
+                 fext = NULL;
+         /*
+          * The title will be converted to uppercase,
+          * so it needs to be copied.
+          */
+         title = (NULL != args->title) ? strdup(args->title) :
+                 (NULL != fext) ? strndup(fbase, fext - fbase) :
+                 strdup(fbase);
          if (NULL == title) {
                  perror(NULL);
                  exit(EXIT_FAILURE);
-Line 635  dofile(const struct args *args, const char *fname,
+Line 1726  dofile(const struct args *args, const char *fname,
 Line 635  dofile(const struct args *args, const char *fname,
 Line 1726  dofile(const struct args *args, const char *fname,
          /* Section is 1 unless suffix is "pm". */
-         if (NULL == (section = args->section)) {
+         section = (NULL != args->section) ? args->section :
-                 section = "1";
+             (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
-                 if (NULL != (cp = strrchr(title, '.'))) {
+             PERL_SECTION;
-                         *cp++ = '\0';
-                         if (0 == strcmp(cp, "pm"))
-                                 section = "3p";
-                 }
-         }
          /* Date.  Or the given "tm" if not supplied. */
-         if (NULL == (date = args->date)) {
+         date = args->date;
-                 strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
+         format = (NULL == date) ? "%B %d, %Y" :
+             strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $";
+         if (NULL != format) {
+                 strftime(datebuf, sizeof(datebuf), format, tm);
                  date = datebuf;
          }
-Line 662  dofile(const struct args *args, const char *fname,
+Line 1752  dofile(const struct args *args, const char *fname,
 Line 662  dofile(const struct args *args, const char *fname,
 Line 1752  dofile(const struct args *args, const char *fname,
          free(title);
+         dict_init();
          memset(&st, 0, sizeof(struct state));
+         st.oust = OUST_NL;
+         st.wantws = 1;
          assert(sz > 0);
          /* Main loop over file contents. */
-         while (cur < sz) {
+         cur = 0;
+         for (;;) {
+                 while (cur < sz && '\n' == buf[cur])
+                         cur++;
+                 if (cur >= sz)
+                         break;
+                 verb = isspace((unsigned char)buf[cur]);
                  /* Read until next paragraph. */
-                 for (i = cur + 1; i < sz; i++)
-                         if ('\n' == buf[i] && '\n' == buf[i - 1]) {
+                 for (end = cur + 1; end + 1 < sz; end++)
-                                 /* Consume blank paragraphs. */
+                         if ('\n' == buf[end] && '\n' == buf[end + 1] &&
-                                 while (i + 1 < sz && '\n' == buf[i + 1])
+                             !(verb && end + 2 < sz &&
-                                         i++;
+                               isspace((unsigned char)buf[end + 2])))
                                  break;
-                         }
                  /* Adjust end marker for EOF. */
-                 end = i < sz ? i - 1 :
-                         ('\n' == buf[sz - 1] ? sz - 1 : sz);
-                 sup = i < sz ? end + 2 : sz;
+                 if (end < sz && '\n' != buf[end])
+                         end++;
                  /* Process paragraph and adjust start. */
                  dopar(&st, buf, cur, end);
-                 cur = sup;
+                 cur = end + 2;
          }
+         dict_destroy();
  }
  /*
-Line 705  readfile(const struct args *args, const char *fname)
+Line 1808  readfile(const struct args *args, const char *fname)
 Line 705  readfile(const struct args *args, const char *fname)
 Line 1808  readfile(const struct args *args, const char *fname)
          time_t           ttm;
          struct stat      st;
-         assert(NULL != fname);
          fd = 0 != strcmp("-", fname) ?
                  open(fname, O_RDONLY, 0) : STDIN_FILENO;
-Line 812  main(int argc, char *argv[])
+Line 1913  main(int argc, char *argv[])
 Line 812  main(int argc, char *argv[])
 Line 1913  main(int argc, char *argv[])
          /* Accept only a single input file. */
-         if (argc > 2)
+         if (argc > 1)
-                 return(EXIT_FAILURE);
+                 goto usage;
          else if (1 == argc)
                  fname = *argv;
-Line 822  main(int argc, char *argv[])
+Line 1923  main(int argc, char *argv[])
 Line 822  main(int argc, char *argv[])
 Line 1923  main(int argc, char *argv[])
  usage:
          fprintf(stderr, "usage: %s [-d date] "
-                 "[-n title] [-s section]\n", name);
+             "[-n title] [-s section] [file]\n", name);
          return(EXIT_FAILURE);
  }

CVSweb