pod2mdoc/pod2mdoc.c - diff

Return to pod2mdoc.c CVS log

Up to [cvsweb.bsd.lv] / pod2mdoc

Diff for /pod2mdoc/pod2mdoc.c between version 1.7 and 1.31

-version 1.7, 2014/03/23 23:35:59
+version 1.31, 2014/07/15 19:03:07
 Line 26
 Line 26
 Line 26
  #include <string.h>
  #include <unistd.h>
+ /*
+  * In what section can we find Perl module manuals?
+  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
+  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
+  */
+ #define PERL_SECTION    "3p"
  struct  args {
          const char      *title; /* override "Dt" title */
          const char      *date; /* override "Dd" date */
-Line 39  enum list {
+Line 46  enum list {
 Line 39  enum list {
 Line 46  enum list {
          LIST__MAX
  };
+ enum    sect {
+         SECT_NONE = 0,
+         SECT_NAME, /* NAME section */
+         SECT_SYNOPSIS, /* SYNOPSIS section */
+ };
  struct  state {
+         const char      *fname; /* file being parsed */
          int              parsing; /* after =cut of before command */
          int              paused; /* in =begin and before =end */
-         int              haspar; /* in paragraph: do we need Pp? */
+         enum sect        sect; /* which section are we in? */
-         int              isname; /* are we the NAME section? */
-         const char      *fname; /* file being parsed */
  #define LIST_STACKSZ     128
          enum list        lstack[LIST_STACKSZ]; /* open lists */
          size_t           lpos; /* where in list stack */
+         int              haspar; /* in paragraph: do we need Pp? */
+         int              hasnl; /* in text: just started a new line */
+         char            *outbuf; /* text buffered for output */
+         size_t           outbufsz; /* allocated size of outbuf */
+         size_t           outbuflen; /* current length of outbuf */
  };
  enum    fmt {
-Line 110  static const char fmts[FMT__MAX] = {
+Line 127  static const char fmts[FMT__MAX] = {
 Line 110  static const char fmts[FMT__MAX] = {
 Line 127  static const char fmts[FMT__MAX] = {
  static  int     last;
+ static void
+ outbuf_grow(struct state *st, size_t by)
+ {
+         st->outbufsz += (by / 128 + 1) * 128;
+         st->outbuf = realloc(st->outbuf, st->outbufsz);
+         if (NULL == st->outbuf) {
+                 perror(NULL);
+                 exit(EXIT_FAILURE);
+         }
+ }
+ static void
+ outbuf_addchar(struct state *st)
+ {
+         if (st->outbuflen + 2 >= st->outbufsz)
+                 outbuf_grow(st, 1);
+         st->outbuf[st->outbuflen++] = last;
+         if ('\\' == last)
+                 st->outbuf[st->outbuflen++] = 'e';
+         st->outbuf[st->outbuflen] = '\0';
+ }
+ static void
+ outbuf_addstr(struct state *st, const char *str)
+ {
+         size_t   slen;
+         slen = strlen(str);
+         if (st->outbuflen + slen >= st->outbufsz)
+                 outbuf_grow(st, slen);
+         memcpy(st->outbuf + st->outbuflen, str, slen+1);
+         last = str[slen - 1];
+ }
+ static void
+ outbuf_flush(struct state *st)
+ {
+         if (0 == st->outbuflen)
+                 return;
+         fputs(st->outbuf, stdout);
+         *st->outbuf = '\0';
+         st->outbuflen = 0;
+         st->hasnl = 0;
+ }
+ static void
+ outbuf_newln(struct state *st)
+ {
+         if ('\n' == last)
+                 return;
+         outbuf_flush(st);
+         putchar('\n');
+         last = '\n';
+         st->hasnl = 1;
+ }
  /*
   * Given buf[*start] is at the start of an escape name, read til the end
   * of the escape ('>') then try to do something with it.
   * Sets start to be one after the '>'.
   */
  static void
- formatescape(const char *buf, size_t *start, size_t end)
+ formatescape(struct state *st, const char *buf, size_t *start, size_t end)
  {
          char             esc[16]; /* no more needed */
          size_t           i, max;
-Line 144  formatescape(const char *buf, size_t *start, size_t en
+Line 223  formatescape(const char *buf, size_t *start, size_t en
 Line 144  formatescape(const char *buf, size_t *start, size_t en
 Line 223  formatescape(const char *buf, size_t *start, size_t en
           * Just let the rest of them go.
           */
          if (0 == strcmp(esc, "lt"))
-                 printf("\\(la");
+                 outbuf_addstr(st, "\\(la");
          else if (0 == strcmp(esc, "gt"))
-                 printf("\\(ra");
+                 outbuf_addstr(st, "\\(ra");
          else if (0 == strcmp(esc, "vb"))
-                 printf("\\(ba");
+                 outbuf_addstr(st, "\\(ba");
          else if (0 == strcmp(esc, "sol"))
-                 printf("\\(sl");
+                 outbuf_addstr(st, "\\(sl");
+ }
+ /*
+  * Run some heuristics to intuit a link format.
+  * I set "start" to be the end of the sequence (last right-carrot) so
+  * that the caller can safely just continue processing.
+  * If this is just an empty tag, I'll return 0.
+  */
+ static int
+ trylink(const char *buf, size_t *start, size_t end, size_t dsz)
+ {
+         size_t           linkstart, realend, linkend,
+                          i, j, textsz, stack;
+         /*
+          * Scan to the start of the terminus.
+          * This function is more or less replicated in the formatcode()
+          * for null or index formatting codes.
+          * However, we're slightly different because we might have
+          * nested escapes we need to ignore.
+          */
+         stack = 0;
+         for (linkstart = realend = *start; realend < end; realend++) {
+                 if ('<' == buf[realend])
+                         stack++;
+                 if ('>' != buf[realend])
+                         continue;
+                 else if (stack-- > 0)
+                         continue;
+                 if (dsz == 1)
+                         break;
+                 assert(realend > 0);
+                 if (' ' != buf[realend - 1])
+                         continue;
+                 for (i = realend, j = 0; i < end && j < dsz; j++)
+                         if ('>' != buf[i++])
+                                 break;
+                 if (dsz == j)
+                         break;
+         }
+         /* Ignore stubs. */
+         if (realend == end || realend == *start)
+                 return(0);
+         /* Set linkend to the end of content. */
+         linkend = dsz > 1 ? realend - 1 : realend;
+         /* Re-scan to see if we have a title or section. */
+         for (textsz = *start; textsz < linkend; textsz++)
+                 if ('|' == buf[textsz] || '/' == buf[textsz])
+                         break;
+         if (textsz < linkend && '|' == buf[textsz]) {
+                 /* With title: set start, then end at section. */
+                 linkstart = textsz + 1;
+                 textsz = textsz - *start;
+                 for (i = linkstart; i < linkend; i++)
+                         if ('/' == buf[i])
+                                 break;
+                 if (i < linkend)
+                         linkend = i;
+         } else if (textsz < linkend && '/' == buf[textsz]) {
+                 /* With section: set end at section. */
+                 linkend = textsz;
+                 textsz = 0;
+         } else
+                 /* No title, no section. */
+                 textsz = 0;
+         *start = realend;
+         j = linkend - linkstart;
+         /* Do we have only subsection material? */
+         if (0 == j && '/' == buf[linkend]) {
+                 linkstart = linkend + 1;
+                 linkend = dsz > 1 ? realend - 1 : realend;
+                 if (0 == (j = linkend - linkstart))
+                         return(0);
+                 printf("Sx %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         } else if (0 == j)
+                 return(0);
+         /* See if we qualify as being a link or not. */
+         if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
+                 (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
+                 /* Gross. */
+                 printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
+                         realend) - linkstart), &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we qualify as a mailto. */
+         if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
+                 printf("Mt %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
+         if ((j > 3 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 3])) {
+                 printf("Xr %.*s %c", (int)(j - 3),
+                         &buf[linkstart], buf[linkend - 2]);
+                 return(1);
+         } else if ((j > 4 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 4])) {
+                 printf("Xr %.*s %.*s", (int)(j - 4),
+                         &buf[linkstart], 2, &buf[linkend - 3]);
+                 return(1);
+         } else if ((j > 5 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 5])) {
+                 printf("Xr %.*s %.*s", (int)(j - 5),
+                         &buf[linkstart], 3, &buf[linkend - 4]);
+                 return(1);
+         }
+         /* Last try: do we have a double-colon? */
+         for (i = linkstart + 1; i < linkend; i++)
+                 if (':' == buf[i] && ':' == buf[i - 1])
+                         break;
+         if (i < linkend)
+                 printf("Xr %.*s " PERL_SECTION,
+                         (int)j, &buf[linkstart]);
          else
-                 return;
+                 printf("Xr %.*s 1", (int)j, &buf[linkstart]);
-         last = 'a';
+         return(1);
  }
  /*
-  * Skip space characters.
+  * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
+  * then it's likely that we're a flag.
+  * Our flag might be followed by an argument, so make sure that we're
+  * accounting for that, too.
+  * If we don't have a flag at all, however, then assume we're an "Ar".
   */
- static int
+ static void
- skipspace(const char *buf, size_t *start, size_t end)
+ dosynopsisfl(const char *buf, size_t *start, size_t end)
  {
-         size_t           sv = *start;
+         size_t   i;
+ again:
+         assert(*start + 1 < end);
+         assert('-' == buf[*start]);
-         while (*start < end && ' ' == buf[*start])
+         if ( ! isalnum((int)buf[*start + 1]) &&
-                 (*start)++;
+                 '?' != buf[*start + 1] &&
+                 '-' != buf[*start + 1]) {
+                 (*start)--;
+                 fputs("Ar ", stdout);
+                 return;
+         }
-         return(*start > sv);
+         (*start)++;
+         for (i = *start; i < end; i++)
+                 if (isalnum((int)buf[i]))
+                         continue;
+                 else if ('?' == buf[i])
+                         continue;
+                 else if ('-' == buf[i])
+                         continue;
+                 else if ('_' == buf[i])
+                         continue;
+                 else
+                         break;
+         assert(i < end);
+         if ( ! (' ' == buf[i] || '>' == buf[i])) {
+                 printf("Ar ");
+                 return;
+         }
+         printf("Fl ");
+         if (end - *start > 1 &&
+                 isupper((int)buf[*start]) &&
+                 islower((int)buf[*start + 1]) &&
+                 (end - *start == 2 ||
+                  ' ' == buf[*start + 2]))
+                 printf("\\&");
+         printf("%.*s ", (int)(i - *start), &buf[*start]);
+         *start = i;
+         if (' ' == buf[i]) {
+                 while (i < end && ' ' == buf[i])
+                         i++;
+                 assert(i < end);
+                 if ('-' == buf[i]) {
+                         *start = i;
+                         goto again;
+                 }
+                 printf("Ar ");
+                 *start = i;
+         }
  }
  /*
-Line 180  skipspace(const char *buf, size_t *start, size_t end)
+Line 440  skipspace(const char *buf, size_t *start, size_t end)
 Line 180  skipspace(const char *buf, size_t *start, size_t end)
 Line 440  skipspace(const char *buf, size_t *start, size_t end)
   * been printed to the current line.
   * If "nomacro", then we don't print any macros, just contained data
   * (e.g., following "Sh" or "Nm").
+  * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
+  * as the first format code on a line (for decoration as an "Nm"),
+  * non-zero otherwise.
   * Return whether we've printed a macro or not--in other words, whether
   * this should trigger a subsequent newline (this should be ignored when
   * reentrant).
   */
  static int
- formatcode(const char *buf, size_t *start,
+ formatcode(struct state *st, const char *buf, size_t *start,
-         size_t end, int reentrant, int nomacro)
+         size_t end, int reentrant, int nomacro, int pos)
  {
          enum fmt         fmt;
          size_t           i, j, dsz;
+         int              white;
          assert(*start + 1 < end);
          assert('<' == buf[*start + 1]);
          /*
           * First, look up the format code.
-          * If it's not valid, then exit immediately.
+          * If it's not valid, treat it as a NOOP.
           */
          for (fmt = 0; fmt < FMT__MAX; fmt++)
                  if (buf[*start] == fmts[fmt])
                          break;
-         if (FMT__MAX == fmt) {
-                 putchar(last = buf[(*start)++]);
-                 return(0);
-         }
          /*
           * Determine whether we're overriding our delimiter.
           * According to POD, if we have more than one '<' followed by a
-Line 231  formatcode(const char *buf, size_t *start,
+Line 490  formatcode(const char *buf, size_t *start,
 Line 231  formatcode(const char *buf, size_t *start,
 Line 490  formatcode(const char *buf, size_t *start,
           * processing for real macros.
           */
          if (FMT_ESCAPE == fmt) {
-                 formatescape(buf, start, end);
+                 formatescape(st, buf, start, end);
                  return(0);
          } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
                  /*
-Line 255  formatcode(const char *buf, size_t *start,
+Line 514  formatcode(const char *buf, size_t *start,
 Line 255  formatcode(const char *buf, size_t *start,
 Line 514  formatcode(const char *buf, size_t *start,
                          (*start) += dsz;
                          break;
                  }
+                 if (*start < end) {
+                         assert('>' == buf[*start]);
+                         (*start)++;
+                 }
+                 if (isspace(last))
+                         while (*start < end && isspace((int)buf[*start]))
+                                 (*start)++;
                  return(0);
          }
-Line 262  formatcode(const char *buf, size_t *start,
+Line 528  formatcode(const char *buf, size_t *start,
 Line 262  formatcode(const char *buf, size_t *start,
 Line 528  formatcode(const char *buf, size_t *start,
           * Check whether we're supposed to print macro stuff (this is
           * suppressed in, e.g., "Nm" and "Sh" macros).
           */
-         if ( ! nomacro) {
+         if (FMT__MAX != fmt && !nomacro) {
+                 white = ' ' == last || '\n' == last ||
+                         ' ' == buf[*start];
                  /*
-                  * Print out the macro describing this format code.
+                  * If we are on a text line and there is no
-                  * If we're not "reentrant" (not yet on a macro line)
+                  * whitespace before our content, we have to make
-                  * then print a newline, if necessary, and the macro
+                  * the previous word a prefix to the macro line.
-                  * indicator.
-                  * Otherwise, offset us with a space.
                   */
-                 if ( ! reentrant) {
+                 if ( ! white && ! reentrant) {
+                         if ( ! st->hasnl)
+                                 putchar('\n');
+                         printf(".Pf ");
+                 }
+                 outbuf_flush(st);
+                 /* Whitespace is easier to suppress on macro lines. */
+                 if ( ! white && reentrant)
+                         printf(" Ns");
+                 /* Unless we are on a macro line, start one. */
+                 if (white && ! reentrant) {
                          if (last != '\n')
                                  putchar('\n');
                          putchar('.');
                  } else
                          putchar(' ');
-                 /*
-                  * If we don't have whitespace before us (and none after
-                  * the opening delimiter), then suppress macro
-                  * whitespace with Pf.
-                  */
-                 if (' ' != last && '\n' != last && ' ' != buf[*start])
-                         printf("Pf ");
+                 /* Print the macro corresponding to this format code. */
                  switch (fmt) {
                  case (FMT_ITALIC):
                          printf("Em ");
                          break;
                  case (FMT_BOLD):
-                         printf("Sy ");
+                         if (SECT_SYNOPSIS == st->sect) {
+                                 if (1 == dsz && '-' == buf[*start])
+                                         dosynopsisfl(buf, start, end);
+                                 else if (0 == pos)
+                                         printf("Nm ");
+                                 else
+                                         printf("Ar ");
+                                 break;
+                         }
+                         if (0 == strncmp(buf + *start, "NULL", 4) &&
+                             ('=' == buf[*start + 4] ||
+                              '>' == buf[*start + 4]))
+                                 printf("Dv ");
+                         else
+                                 printf("Sy ");
                          break;
                  case (FMT_CODE):
                          printf("Qo Li ");
                          break;
                  case (FMT_LINK):
-                         printf("Lk ");
+                         /* Try to link; use "No" if it's empty. */
+                         if ( ! trylink(buf, start, end, dsz))
+                                 printf("No ");
                          break;
                  case (FMT_FILE):
                          printf("Pa ");
                          break;
                  case (FMT_NBSP):
-                         /* TODO. */
                          printf("No ");
                          break;
                  default:
                          abort();
                  }
-         }
+         } else
+                 outbuf_flush(st);
          /*
           * Process until we reach the end marker (e.g., '>') or until we
-Line 338  formatcode(const char *buf, size_t *start,
+Line 631  formatcode(const char *buf, size_t *start,
 Line 338  formatcode(const char *buf, size_t *start,
 Line 631  formatcode(const char *buf, size_t *start,
                          }
                  }
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
-                         formatcode(buf, start, end, 1, nomacro);
+                         formatcode(st, buf, start, end, 1, nomacro, 1);
                          continue;
                  }
-Line 363  formatcode(const char *buf, size_t *start,
+Line 656  formatcode(const char *buf, size_t *start,
 Line 363  formatcode(const char *buf, size_t *start,
 Line 656  formatcode(const char *buf, size_t *start,
                  else
                          putchar(last = buf[*start]);
+                 /* Protect against character escapes. */
+                 if ('\\' == last)
+                         putchar('e');
                  (*start)++;
                  if (' ' == last)
-Line 370  formatcode(const char *buf, size_t *start,
+Line 667  formatcode(const char *buf, size_t *start,
 Line 370  formatcode(const char *buf, size_t *start,
 Line 667  formatcode(const char *buf, size_t *start,
                                  (*start)++;
          }
+         if (FMT__MAX == fmt)
+                 return(0);
          if ( ! nomacro && FMT_CODE == fmt)
                  printf(" Qc ");
-Line 389  formatcode(const char *buf, size_t *start,
+Line 689  formatcode(const char *buf, size_t *start,
 Line 389  formatcode(const char *buf, size_t *start,
 Line 689  formatcode(const char *buf, size_t *start,
   * Calls formatcode() til the end of a paragraph.
   */
  static void
- formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
+ formatcodeln(struct state *st, const char *buf,
+         size_t *start, size_t end, int nomacro)
  {
          last = ' ';
          while (*start < end)  {
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
-                         formatcode(buf, start, end, 1, nomacro);
+                         formatcode(st, buf, start, end, 1, nomacro, 1);
                          continue;
                  }
                  /*
-Line 412  formatcodeln(const char *buf, size_t *start, size_t en
+Line 713  formatcodeln(const char *buf, size_t *start, size_t en
 Line 412  formatcodeln(const char *buf, size_t *start, size_t en
 Line 713  formatcodeln(const char *buf, size_t *start, size_t en
                                   ' ' == buf[*start + 2]))
                          printf("\\&");
-                 if ('\n' != buf[*start])
+                 if ('\n' == buf[*start])
-                         putchar(last = buf[*start]);
-                 else
                          putchar(last = ' ');
+                 else
+                         putchar(last = buf[*start]);
+                 /* Protect against character escapes. */
+                 if ('\\' == last)
+                         putchar('e');
                  (*start)++;
          }
  }
-Line 471  command(struct state *st, const char *buf, size_t star
+Line 777  command(struct state *st, const char *buf, size_t star
 Line 471  command(struct state *st, const char *buf, size_t star
 Line 777  command(struct state *st, const char *buf, size_t star
                  return;
          start += csz;
-         skipspace(buf, &start, end);
+         while (start < end && ' ' == buf[start])
+                 start++;
          len = end - start;
          if (st->paused) {
-Line 488  command(struct state *st, const char *buf, size_t star
+Line 796  command(struct state *st, const char *buf, size_t star
 Line 488  command(struct state *st, const char *buf, size_t star
 Line 796  command(struct state *st, const char *buf, size_t star
                   * how pod2man handles it.
                   */
                  printf(".Sh ");
-                 st->isname = 0;
+                 st->sect = SECT_NONE;
-                 if (end - start == 4)
+                 if (end - start == 4) {
                          if (0 == memcmp(&buf[start], "NAME", 4))
-                                 st->isname = 1;
+                                 st->sect = SECT_NAME;
-                 formatcodeln(buf, &start, end, 1);
+                 } else if (end - start == 8) {
-                 putchar('\n');
+                         if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
+                                 st->sect = SECT_SYNOPSIS;
+                 }
+                 formatcodeln(st, buf, &start, end, 1);
+                 putchar(last = '\n');
                  st->haspar = 1;
                  break;
          case (CMD_HEAD2):
                  printf(".Ss ");
-                 formatcodeln(buf, &start, end, 1);
+                 formatcodeln(st, buf, &start, end, 1);
-                 putchar('\n');
+                 putchar(last = '\n');
                  st->haspar = 1;
                  break;
          case (CMD_HEAD3):
                  puts(".Pp");
                  printf(".Em ");
-                 formatcodeln(buf, &start, end, 0);
+                 formatcodeln(st, buf, &start, end, 0);
-                 putchar('\n');
+                 putchar(last = '\n');
                  puts(".Pp");
                  st->haspar = 1;
                  break;
          case (CMD_HEAD4):
                  puts(".Pp");
                  printf(".No ");
-                 formatcodeln(buf, &start, end, 0);
+                 formatcodeln(st, buf, &start, end, 0);
-                 putchar('\n');
+                 putchar(last = '\n');
                  puts(".Pp");
                  st->haspar = 1;
                  break;
-Line 567  command(struct state *st, const char *buf, size_t star
+Line 879  command(struct state *st, const char *buf, size_t star
 Line 567  command(struct state *st, const char *buf, size_t star
 Line 879  command(struct state *st, const char *buf, size_t star
                  switch (st->lstack[st->lpos - 1]) {
                  case (LIST_TAG):
                          printf(".It ");
-                         formatcodeln(buf, &start, end, 0);
+                         formatcodeln(st, buf, &start, end, 0);
-                         putchar('\n');
+                         putchar(last = '\n');
                          break;
                  case (LIST_ENUM):
                          /* FALLTHROUGH */
-Line 624  command(struct state *st, const char *buf, size_t star
+Line 936  command(struct state *st, const char *buf, size_t star
 Line 624  command(struct state *st, const char *buf, size_t star
 Line 936  command(struct state *st, const char *buf, size_t star
  static void
  verbatim(struct state *st, const char *buf, size_t start, size_t end)
  {
-         size_t           sv = start;
+         size_t           i;
          if ( ! st->parsing || st->paused)
                  return;
+ again:
+         /*
+          * If we're in the SYNOPSIS, see if we're an #include block.
+          * If we are, then print the "In" macro and re-loop.
+          * This handles any number of inclusions, but only when they
+          * come before the remaining parts...
+          */
+         if (SECT_SYNOPSIS == st->sect) {
+                 i = start;
+                 for (i = start; i < end && ' ' == buf[i]; i++)
+                         /* Spin. */ ;
+                 if (i == end)
+                         return;
+                 /* We're an include block! */
+                 if (end - i > 10 &&
+== memcmp(&buf[i], "#include <", 10)) {
+                         start = i + 10;
+                         while (start < end && ' ' == buf[start])
+                                 start++;
+                         fputs(".In ", stdout);
+                         /* Stop til the '>' marker or we hit eoln. */
+                         while (start < end &&
+                                 '>' != buf[start] && '\n' != buf[start])
+                                 putchar(buf[start++]);
+                         putchar('\n');
+                         if (start < end && '>' == buf[start])
+                                 start++;
+                         if (start < end && '\n' == buf[start])
+                                 start++;
+                         if (start < end)
+                                 goto again;
+                         return;
+                 }
+         }
+         if (start == end)
+                 return;
          puts(".Bd -literal");
-         while (start < end) {
+         for (last = ' '; start < end; start++) {
-                 if (start > sv && '\n' == buf[start - 1])
+                 /*
+                  * Handle accidental macros (newline starting with
+                  * control character) and escapes.
+                  */
+                 if ('\n' == last)
                          if ('.' == buf[start] || '\'' == buf[start])
                                  printf("\\&");
-                 putchar(buf[start++]);
+                 putchar(last = buf[start]);
+                 if ('\\' == buf[start])
+                         printf("e");
          }
-         putchar('\n');
+         putchar(last = '\n');
          puts(".Ed");
  }
  /*
+  * See dosynopsisop().
+  */
+ static int
+ hasmatch(const char *buf, size_t start, size_t end)
+ {
+         size_t   stack;
+         for (stack = 0; start < end; start++)
+                 if (buf[start] == '[')
+                         stack++;
+                 else if (buf[start] == ']' && 0 == stack)
+                         return(1);
+                 else if (buf[start] == ']')
+                         stack--;
+         return(0);
+ }
+ /*
+  * If we're in the SYNOPSIS section and we've encounter braces in an
+  * ordinary paragraph, then try to see whether we're an [-option].
+  * Do this, if we're an opening bracket, by first seeing if we have a
+  * matching end via hasmatch().
+  * If we're an ending bracket, see if we have a stack already.
+  */
+ static int
+ dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack)
+ {
+         assert('[' == buf[*start] || ']' == buf[*start]);
+         if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
+                 if ('\n' != last)
+                         putchar('\n');
+                 puts(".Oo");
+                 (*opstack)++;
+         } else if ('[' == buf[*start])
+                 return(0);
+         if (']' == buf[*start] && *opstack > 0) {
+                 if ('\n' != last)
+                         putchar('\n');
+                 puts(".Oc");
+                 (*opstack)--;
+         } else if (']' == buf[*start])
+                 return(0);
+         (*start)++;
+         last = '\n';
+         while (' ' == buf[*start])
+                 (*start)++;
+         return(1);
+ }
+ /*
+  * Format multiple "Nm" manpage names in the NAME section.
+  */
+ static void
+ donamenm(struct state *st, const char *buf, size_t *start, size_t end)
+ {
+         size_t   word;
+         while (*start < end && ' ' == buf[*start])
+                 (*start)++;
+         if (end == *start) {
+                 puts(".Nm unknown");
+                 return;
+         }
+         while (*start < end) {
+                 fputs(".Nm ", stdout);
+                 for (word = *start; word < end; word++)
+                         if (',' == buf[word])
+                                 break;
+                 formatcodeln(st, buf, start, word, 1);
+                 if (*start == end) {
+                         putchar(last = '\n');
+                         continue;
+                 }
+                 assert(',' == buf[*start]);
+                 puts(" ,");
+                 (*start)++;
+                 while (*start < end && ' ' == buf[*start])
+                         (*start)++;
+         }
+ }
+ /*
   * Ordinary paragraph.
   * Well, this is really the hardest--POD seems to assume that, for
   * example, a leading space implies a newline, and so on.
-Line 651  verbatim(struct state *st, const char *buf, size_t sta
+Line 1093  verbatim(struct state *st, const char *buf, size_t sta
 Line 651  verbatim(struct state *st, const char *buf, size_t sta
 Line 1093  verbatim(struct state *st, const char *buf, size_t sta
  static void
  ordinary(struct state *st, const char *buf, size_t start, size_t end)
  {
-         size_t          i, j;
+         size_t          i, j, opstack;
+         int             seq;
          if ( ! st->parsing || st->paused)
                  return;
-Line 662  ordinary(struct state *st, const char *buf, size_t sta
+Line 1105  ordinary(struct state *st, const char *buf, size_t sta
 Line 662  ordinary(struct state *st, const char *buf, size_t sta
 Line 1105  ordinary(struct state *st, const char *buf, size_t sta
           * we're in "name - description" format.
           * To wit, print out a "Nm" and "Nd" in that format.
           */
-         if (st->isname) {
+         if (SECT_NAME == st->sect) {
-                 for (i = end - 1; i > start; i--)
+                 for (i = end - 2; i > start; i--)
-                         if ('-' == buf[i])
+                         if ('-' == buf[i] && ' ' == buf[i + 1])
                                  break;
                  if ('-' == buf[i]) {
                          j = i;
-Line 672  ordinary(struct state *st, const char *buf, size_t sta
+Line 1115  ordinary(struct state *st, const char *buf, size_t sta
 Line 672  ordinary(struct state *st, const char *buf, size_t sta
 Line 1115  ordinary(struct state *st, const char *buf, size_t sta
                          for ( ; i > start; i--)
                                  if ('-' != buf[i])
                                          break;
-                         printf(".Nm ");
+                         donamenm(st, buf, &start, i + 1);
-                         formatcodeln(buf, &start, i + 1, 1);
-                         putchar('\n');
                          start = j + 1;
-                         printf(".Nd ");
+                         while (start < end && ' ' == buf[start])
-                         formatcodeln(buf, &start, end, 1);
+                                 start++;
-                         putchar('\n');
+                         fputs(".Nd ", stdout);
+                         formatcodeln(st, buf, &start, end, 1);
+                         putchar(last = '\n');
                          return;
                  }
          }
-Line 687  ordinary(struct state *st, const char *buf, size_t sta
+Line 1130  ordinary(struct state *st, const char *buf, size_t sta
 Line 687  ordinary(struct state *st, const char *buf, size_t sta
 Line 1130  ordinary(struct state *st, const char *buf, size_t sta
                  puts(".Pp");
          st->haspar = 0;
+         st->hasnl = 1;
          last = '\n';
+         opstack = 0;
-         while (start < end) {
+         for (seq = 0; start < end; seq++) {
                  /*
                   * Loop til we get either to a newline or escape.
                   * Escape initial control characters.
-Line 700  ordinary(struct state *st, const char *buf, size_t sta
+Line 1145  ordinary(struct state *st, const char *buf, size_t sta
 Line 700  ordinary(struct state *st, const char *buf, size_t sta
 Line 1145  ordinary(struct state *st, const char *buf, size_t sta
                          else if ('\n' == buf[start])
                                  break;
                          else if ('\n' == last && '.' == buf[start])
-                                 printf("\\&");
+                                 outbuf_addstr(st, "\\&");
                          else if ('\n' == last && '\'' == buf[start])
-                                 printf("\\&");
+                                 outbuf_addstr(st, "\\&");
-                         putchar(last = buf[start++]);
+                         /*
+                          * If we're in the SYNOPSIS, have square
+                          * brackets indicate that we're opening and
+                          * closing an optional context.
+                          */
+                         if (SECT_SYNOPSIS == st->sect &&
+                                 ('[' == buf[start] ||
+                                  ']' == buf[start]) &&
+                                 dosynopsisop(buf, &start, end, &opstack))
+                                 continue;
+                         last = buf[start++];
+                         if (' ' == last) {
+                                 outbuf_flush(st);
+                                 putchar(' ');
+                         } else
+                                 outbuf_addchar(st);
                  }
                  if (start < end - 1 && '<' == buf[start + 1]) {
-                         /*
+                         if (formatcode(st, buf, &start, end, 0, 0, seq)) {
-                          * We've encountered a format code.
+                                 /*
-                          * This is going to trigger a macro no matter
+                                  * Let mdoc(7) handle trailing punctuation.
-                          * what, so print a newline now.
+                                  * XXX Some punctuation characters
-                          * Then print the (possibly nested) macros and
+                                  *     are not handled yet.
-                          * following that, a newline.
+                                  */
-                          */
+                                 if ((start == end - 1 ||
-                         if (formatcode(buf, &start, end, 0, 0)) {
+                                         (start < end - 1 &&
+                                          (' ' == buf[start + 1] ||
+                                           '\n' == buf[start + 1]))) &&
+                                         ('.' == buf[start] ||
+                                          ',' == buf[start])) {
+                                         putchar(' ');
+                                         putchar(buf[start++]);
+                                 }
+                                 /* End the macro line. */
                                  putchar(last = '\n');
+                                 st->hasnl = 1;
+                                 /*
+                                  * Consume all whitespace
+                                  * so we don't accidentally start
+                                  * an implicit literal line.
+                                  */
                                  while (start < end && ' ' == buf[start])
                                          start++;
                          }
                  } else if (start < end && '\n' == buf[start]) {
-                         /*
+                         outbuf_newln(st);
-                          * Print the newline only if we haven't already
-                          * printed a newline.
-                          */
-                         if (last != '\n')
-                                 putchar(last = buf[start]);
                          if (++start >= end)
                                  continue;
                          /*
-Line 736  ordinary(struct state *st, const char *buf, size_t sta
+Line 1205  ordinary(struct state *st, const char *buf, size_t sta
 Line 736  ordinary(struct state *st, const char *buf, size_t sta
 Line 1205  ordinary(struct state *st, const char *buf, size_t sta
                           * have a macro subsequent it, which may be
                           * possible if we have an escape next.
                           */
-                         if (' ' == buf[start] || '\t' == buf[start]) {
+                         if (' ' == buf[start] || '\t' == buf[start])
                                  puts(".br");
-                                 last = '\n';
-                         }
                          for ( ; start < end; start++)
                                  if (' ' != buf[start] && '\t' != buf[start])
                                          break;
-                 } else if (start < end) {
+                 }
-                         /*
-                          * Default: print the character.
-                          * Escape initial control characters.
-                          */
-                         if ('\n' == last && '.' == buf[start])
-                                 printf("\\&");
-                         else if ('\n' == last && '\'' == buf[start])
-                                 printf("\\&");
-                         putchar(last = buf[start++]);
-                 }
          }
+         outbuf_newln(st);
-         if (last != '\n')
-                 putchar('\n');
  }
  /*
-Line 787  static void
+Line 1242  static void
 Line 787  static void
 Line 1242  static void
  dofile(const struct args *args, const char *fname,
          const struct tm *tm, const char *buf, size_t sz)
  {
-         size_t           sup, end, i, cur = 0;
-         struct state     st;
-         const char      *section, *date;
          char             datebuf[64];
+         struct state     st;
+         const char      *fbase, *fext, *section, *date;
          char            *title, *cp;
+         size_t           sup, end, i, cur = 0;
          if (0 == sz)
                  return;
-         /* Title is last path component of the filename. */
+         /*
+          * Parsing the filename is almost always required,
+          * except when both the title and the section
+          * are provided on the command line.
+          */
-         if (NULL != args->title)
+         if (NULL == args->title || NULL == args->section) {
-                 title = strdup(args->title);
+                 fbase = strrchr(fname, '/');
-         else if (NULL != (cp = strrchr(fname, '/')))
+                 if (NULL == fbase)
-                 title = strdup(cp + 1);
+                         fbase = fname;
-         else
+                 else
-                 title = strdup(fname);
+                         fbase++;
+                 fext = strrchr(fbase, '.');
+         } else
+                 fext = NULL;
+         /*
+          * The title will be converted to uppercase,
+          * so it needs to be copied.
+          */
+         title = (NULL != args->title) ? strdup(args->title) :
+                 (NULL != fext) ? strndup(fbase, fext - fbase) :
+                 strdup(fbase);
          if (NULL == title) {
                  perror(NULL);
                  exit(EXIT_FAILURE);
-Line 812  dofile(const struct args *args, const char *fname,
+Line 1283  dofile(const struct args *args, const char *fname,
 Line 812  dofile(const struct args *args, const char *fname,
 Line 1283  dofile(const struct args *args, const char *fname,
          /* Section is 1 unless suffix is "pm". */
-         if (NULL == (section = args->section)) {
+         section = (NULL != args->section) ? args->section :
-                 section = "1";
+             (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
-                 if (NULL != (cp = strrchr(title, '.'))) {
+             PERL_SECTION;
-                         *cp++ = '\0';
-                         if (0 == strcmp(cp, "pm"))
-                                 section = "3p";
-                 }
-         }
          /* Date.  Or the given "tm" if not supplied. */
-Line 882  readfile(const struct args *args, const char *fname)
+Line 1348  readfile(const struct args *args, const char *fname)
 Line 882  readfile(const struct args *args, const char *fname)
 Line 1348  readfile(const struct args *args, const char *fname)
          time_t           ttm;
          struct stat      st;
-         assert(NULL != fname);
          fd = 0 != strcmp("-", fname) ?
                  open(fname, O_RDONLY, 0) : STDIN_FILENO;
-Line 989  main(int argc, char *argv[])
+Line 1453  main(int argc, char *argv[])
 Line 989  main(int argc, char *argv[])
 Line 1453  main(int argc, char *argv[])
          /* Accept only a single input file. */
-         if (argc > 2)
+         if (argc > 1)
-                 return(EXIT_FAILURE);
+                 goto usage;
          else if (1 == argc)
                  fname = *argv;
-Line 999  main(int argc, char *argv[])
+Line 1463  main(int argc, char *argv[])
 Line 999  main(int argc, char *argv[])
 Line 1463  main(int argc, char *argv[])
  usage:
          fprintf(stderr, "usage: %s [-d date] "
-                 "[-n title] [-s section]\n", name);
+             "[-n title] [-s section] [file]\n", name);
          return(EXIT_FAILURE);
  }

CVSweb