pod2mdoc/pod2mdoc.c - diff

Return to pod2mdoc.c CVS log

Up to [cvsweb.bsd.lv] / pod2mdoc

Diff for /pod2mdoc/pod2mdoc.c between version 1.12 and 1.19

-version 1.12, 2014/04/01 11:58:32
+version 1.19, 2014/04/03 10:17:14
 Line 27
 Line 27
 Line 27
  #include <unistd.h>
  /*
-  * In what section can we find Perl manuals?
+  * In what section can we find Perl module manuals?
+  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
+  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
   */
  #define PERL_SECTION    "3p"
-Line 170  formatescape(const char *buf, size_t *start, size_t en
+Line 172  formatescape(const char *buf, size_t *start, size_t en
 Line 170  formatescape(const char *buf, size_t *start, size_t en
 Line 172  formatescape(const char *buf, size_t *start, size_t en
  /*
   * Run some heuristics to intuit a link format.
-  * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
+  * I set "start" to be the end of the sequence (last right-carrot) so
-  * or a general UNIX foo(5) manpage.
-  * If I recognise one, I set "start" to be the end of the sequence so
   * that the caller can safely just continue processing.
-  * Otherwise, I don't touch "start".
+  * If this is just an empty tag, I'll return 0.
   */
  static int
  trylink(const char *buf, size_t *start, size_t end, size_t dsz)
  {
-         size_t          sv, nstart, nend, i, j;
+         size_t           linkstart, realend, linkend, i, j, textsz;
-         int             hasdouble;
+         const char      *text;
          /*
           * Scan to the start of the terminus.
           * This function is more or less replicated in the formatcode()
           * for null or index formatting codes.
           */
-         hasdouble = 0;
+         for (linkstart = realend = *start; realend < end; realend++) {
-         for (sv = nstart = *start; nstart < end; nstart++) {
+                 if ('>' != buf[realend])
-                 /* Do we have a double-colon? */
-                 if (':' == buf[nstart] &&
-                         nstart > sv &&
-                         ':' == buf[nstart - 1])
-                         hasdouble = 1;
-                 if ('>' != buf[nstart])
                          continue;
                  else if (dsz == 1)
                          break;
-                 assert(nstart > 0);
+                 assert(realend > 0);
-                 if (' ' != buf[nstart - 1])
+                 if (' ' != buf[realend - 1])
                          continue;
-                 i = nstart;
+                 for (i = realend, j = 0; i < end && j < dsz; j++)
-                 for (j = 0; i < end && j < dsz; j++)
                          if ('>' != buf[i++])
                                  break;
                  if (dsz == j)
                          break;
          }
-         /* We don't care about stubs. */
+         /* Ignore stubs. */
-         if (nstart == end || nstart == *start)
+         if (realend == end || realend == *start)
                  return(0);
-         /* Set nend to the end of content. */
+         /* Set linkend to the end of content. */
-         nend = nstart;
+         linkend = dsz > 1 ? realend - 1 : realend;
-         if (dsz > 1)
-                 nend--;
-         /*
+         /* Re-scan to see if we have a title or section. */
-          * Provide for some common invocations of the link primitive.
+         text = &buf[*start];
-          * First, allow us to link to other Perl manuals.
+         for (textsz = *start; textsz < linkend; textsz++)
-          */
+                 if ('|' == buf[textsz] || '/' == buf[textsz])
-         if (hasdouble)
+                         break;
+         /* If we have a title, find the section. */
+         if (textsz < linkend && '|' == buf[textsz]) {
+                 linkstart = textsz + 1;
+                 textsz = textsz - *start;
+                 for (i = linkstart; i < linkend; i++)
+                         if ('/' == buf[i])
+                                 break;
+                 if (i < linkend)
+                         linkend = i;
+         } else {
+                 textsz = 0;
+                 if (textsz < linkend && '/' == buf[textsz])
+                         linkend = textsz;
+         }
+         *start = realend;
+         j = linkend - linkstart;
+         if (0 == j)
+                 return(0);
+         /* See if we qualify as being a link or not. */
+         if ((j > 5 && 0 == memcmp("http:", &buf[linkstart], j)) ||
+                 (j > 6 && 0 == memcmp("https:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
+                 (j > 5 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("afs:", &buf[linkstart], j))) {
+                 printf("Lk %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we qualify as a mailto. */
+         if (j > 7 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
+                 printf("Mt %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
+         if ((j > 3 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 3])) {
+                 printf("Xr %.*s %c", (int)(j - 3),
+                         &buf[linkstart], buf[linkend - 2]);
+                 return(1);
+         } else if ((j > 4 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 4])) {
+                 printf("Xr %.*s %.*s", (int)(j - 4),
+                         &buf[linkstart], 2, &buf[linkend - 3]);
+                 return(1);
+         } else if ((j > 5 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 5])) {
+                 printf("Xr %.*s %.*s", (int)(j - 5),
+                         &buf[linkstart], 3, &buf[linkend - 4]);
+                 return(1);
+         }
+         /* Last try: do we have a double-colon? */
+         for (i = linkstart + 1; i < linkend; i++)
+                 if (':' == buf[i] && ':' == buf[i - 1])
+                         break;
+         if (i < linkend)
                  printf("Xr %.*s " PERL_SECTION,
-                         (int)(nend - sv), &buf[sv]);
+                         (int)j, &buf[linkstart]);
-         else if (nend - sv > 3 && isalnum(buf[sv]) &&
-                         ')' == buf[nend - 1] &&
-                         isdigit((int)buf[nend - 2]) &&
-                         '(' == buf[nend - 3])
-                 printf("Xr %.*s %c",
-                         (int)(nend - 3 - sv),
-                         &buf[sv], buf[nend - 2]);
          else
-                 return(0);
+                 printf("Xr %.*s 1", (int)j, &buf[linkstart]);
-         *start = nstart;
          return(1);
  }
  /*
+  * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
+  * then it's likely that we're a flag.
+  * Our flag might be followed by an argument, so make sure that we're
+  * accounting for that, too.
+  * If we don't have a flag at all, however, then assume we're an "Ar".
+  */
+ static void
+ dosynopsisfl(const char *buf, size_t *start, size_t end)
+ {
+         size_t   i;
+ again:
+         assert(*start + 1 < end);
+         assert('-' == buf[*start]);
+         if ( ! isalnum((int)buf[*start + 1]) &&
+                 '?' != buf[*start + 1] &&
+                 '-' != buf[*start + 1]) {
+                 (*start)--;
+                 fputs("Ar ", stdout);
+                 return;
+         }
+         (*start)++;
+         for (i = *start; i < end; i++)
+                 if (isalnum((int)buf[i]))
+                         continue;
+                 else if ('?' == buf[i])
+                         continue;
+                 else if ('-' == buf[i])
+                         continue;
+                 else if ('_' == buf[i])
+                         continue;
+                 else
+                         break;
+         assert(i < end);
+         if ( ! (' ' == buf[i] || '>' == buf[i])) {
+                 printf("Ar ");
+                 return;
+         }
+         printf("Fl ");
+         if (end - *start > 1 &&
+                 isupper((int)buf[*start]) &&
+                 islower((int)buf[*start + 1]) &&
+                 (end - *start == 2 ||
+                  ' ' == buf[*start + 2]))
+                 printf("\\&");
+         printf("%.*s ", (int)(i - *start), &buf[*start]);
+         *start = i;
+         if (' ' == buf[i]) {
+                 while (i < end && ' ' == buf[i])
+                         i++;
+                 assert(i < end);
+                 if ('-' == buf[i]) {
+                         *start = i;
+                         goto again;
+                 }
+                 printf("Ar ");
+                 *start = i;
+         }
+ }
+ /*
   * We're at the character in front of a format code, which is structured
   * like X<...> and can contain nested format codes.
   * This consumes the whole format code, and any nested format codes, til
-Line 248  trylink(const char *buf, size_t *start, size_t end, si
+Line 361  trylink(const char *buf, size_t *start, size_t end, si
 Line 248  trylink(const char *buf, size_t *start, size_t end, si
 Line 361  trylink(const char *buf, size_t *start, size_t end, si
   * been printed to the current line.
   * If "nomacro", then we don't print any macros, just contained data
   * (e.g., following "Sh" or "Nm").
+  * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
+  * as the first format code on a line (for decoration as an "Nm"),
+  * non-zero otherwise.
   * Return whether we've printed a macro or not--in other words, whether
   * this should trigger a subsequent newline (this should be ignored when
   * reentrant).
   */
  static int
- formatcode(struct state *st, const char *buf,
+ formatcode(struct state *st, const char *buf, size_t *start,
-         size_t *start, size_t end, int reentrant, int nomacro)
+         size_t end, int reentrant, int nomacro, int pos)
  {
          enum fmt         fmt;
          size_t           i, j, dsz;
-Line 360  formatcode(struct state *st, const char *buf,
+Line 476  formatcode(struct state *st, const char *buf,
 Line 360  formatcode(struct state *st, const char *buf,
 Line 476  formatcode(struct state *st, const char *buf,
                          printf("Em ");
                          break;
                  case (FMT_BOLD):
-                         /*
+                         if (SECT_SYNOPSIS == st->sect) {
-                          * Doclifting: if we're a bold "-xx" and we're
+                                 if (1 == dsz && '-' == buf[*start])
-                          * in the SYNOPSIS section, then it's likely
+                                         dosynopsisfl(buf, start, end);
-                          * that we're a flag.
+                                 else if (0 == pos)
-                          * Be really strict: only do this when the dash
+                                         printf("Nm ");
-                          * is followed by alnums til the end marker,
+                                 else
-                          * which mustn't be a custom.
+                                         printf("Ar ");
-                          */
+                                 break;
-                         if (SECT_SYNOPSIS == st->sect &&
-                                 end - *start > 1 &&
-                                 '-' == buf[*start] &&
-                                 (isalnum((int)buf[*start + 1]) ||
-                                  '?' == buf[*start + 1])) {
-                                 for (i = *start + 1; i < end; i++)
-                                         if ( ! isalnum((int)buf[i]))
-                                                 break;
-                                 if (i < end && '>' == buf[i]) {
-                                         (*start)++;
-                                         printf("Fl ");
-                                         break;
-                                 }
                          }
                          printf("Sy ");
                          break;
-Line 388  formatcode(struct state *st, const char *buf,
+Line 491  formatcode(struct state *st, const char *buf,
 Line 388  formatcode(struct state *st, const char *buf,
 Line 491  formatcode(struct state *st, const char *buf,
                          printf("Qo Li ");
                          break;
                  case (FMT_LINK):
+                         /* Try to link; use "No" if it's empty. */
                          if ( ! trylink(buf, start, end, dsz))
                                  printf("No ");
                          break;
-Line 430  formatcode(struct state *st, const char *buf,
+Line 534  formatcode(struct state *st, const char *buf,
 Line 430  formatcode(struct state *st, const char *buf,
 Line 534  formatcode(struct state *st, const char *buf,
                          }
                  }
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
-                         formatcode(st, buf, start, end, 1, nomacro);
+                         formatcode(st, buf, start, end, 1, nomacro, 1);
                          continue;
                  }
-Line 492  formatcodeln(struct state *st, const char *buf,
+Line 596  formatcodeln(struct state *st, const char *buf,
 Line 492  formatcodeln(struct state *st, const char *buf,
 Line 596  formatcodeln(struct state *st, const char *buf,
          last = ' ';
          while (*start < end)  {
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
-                         formatcode(st, buf, start, end, 1, nomacro);
+                         formatcode(st, buf, start, end, 1, nomacro, 1);
                          continue;
                  }
                  /*
-Line 755  verbatim(struct state *st, const char *buf, size_t sta
+Line 859  verbatim(struct state *st, const char *buf, size_t sta
 Line 755  verbatim(struct state *st, const char *buf, size_t sta
 Line 859  verbatim(struct state *st, const char *buf, size_t sta
  }
  /*
+  * See dosynopsisop().
+  */
+ static int
+ hasmatch(const char *buf, size_t start, size_t end)
+ {
+         size_t   stack;
+         for (stack = 0; start < end; start++)
+                 if (buf[start] == '[')
+                         stack++;
+                 else if (buf[start] == ']' && 0 == stack)
+                         return(1);
+                 else if (buf[start] == ']')
+                         stack--;
+         return(0);
+ }
+ /*
+  * If we're in the SYNOPSIS section and we've encounter braces in an
+  * ordinary paragraph, then try to see whether we're an [-option].
+  * Do this, if we're an opening bracket, by first seeing if we have a
+  * matching end via hasmatch().
+  * If we're an ending bracket, see if we have a stack already.
+  */
+ static int
+ dosynopsisop(const char *buf, int *last,
+         size_t *start, size_t end, size_t *opstack)
+ {
+         assert('[' == buf[*start] || ']' == buf[*start]);
+         if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
+                 if ('\n' != *last)
+                         putchar('\n');
+                 puts(".Oo");
+                 (*opstack)++;
+         } else if ('[' == buf[*start])
+                 return(0);
+         if (']' == buf[*start] && *opstack > 0) {
+                 if ('\n' != *last)
+                         putchar('\n');
+                 puts(".Oc");
+                 (*opstack)--;
+         } else if (']' == buf[*start])
+                 return(0);
+         (*start)++;
+         *last = '\n';
+         while (' ' == buf[*start])
+                 (*start)++;
+         return(1);
+ }
+ /*
+  * Format multiple "Nm" manpage names in the NAME section.
+  */
+ static void
+ donamenm(struct state *st, const char *buf, size_t *start, size_t end)
+ {
+         size_t   word;
+         while (*start < end && ' ' == buf[*start])
+                 (*start)++;
+         if (end == *start) {
+                 puts(".Nm unknown");
+                 return;
+         }
+         while (*start < end) {
+                 fputs(".Nm ", stdout);
+                 for (word = *start; word < end; word++)
+                         if (',' == buf[word])
+                                 break;
+                 formatcodeln(st, buf, start, word, 1);
+                 if (*start == end) {
+                         putchar('\n');
+                         continue;
+                 }
+                 assert(',' == buf[*start]);
+                 puts(" ,");
+                 (*start)++;
+                 while (*start < end && ' ' == buf[*start])
+                         (*start)++;
+         }
+ }
+ /*
   * Ordinary paragraph.
   * Well, this is really the hardest--POD seems to assume that, for
   * example, a leading space implies a newline, and so on.
-Line 765  verbatim(struct state *st, const char *buf, size_t sta
+Line 958  verbatim(struct state *st, const char *buf, size_t sta
 Line 765  verbatim(struct state *st, const char *buf, size_t sta
 Line 958  verbatim(struct state *st, const char *buf, size_t sta
  static void
  ordinary(struct state *st, const char *buf, size_t start, size_t end)
  {
-         size_t          i, j;
+         size_t          i, j, opstack;
+         int             seq;
          if ( ! st->parsing || st->paused)
                  return;
-Line 777  ordinary(struct state *st, const char *buf, size_t sta
+Line 971  ordinary(struct state *st, const char *buf, size_t sta
 Line 777  ordinary(struct state *st, const char *buf, size_t sta
 Line 971  ordinary(struct state *st, const char *buf, size_t sta
           * To wit, print out a "Nm" and "Nd" in that format.
           */
          if (SECT_NAME == st->sect) {
-                 for (i = end - 1; i > start; i--)
+                 for (i = end - 2; i > start; i--)
-                         if ('-' == buf[i])
+                         if ('-' == buf[i] && ' ' == buf[i + 1])
                                  break;
                  if ('-' == buf[i]) {
                          j = i;
-Line 786  ordinary(struct state *st, const char *buf, size_t sta
+Line 980  ordinary(struct state *st, const char *buf, size_t sta
 Line 786  ordinary(struct state *st, const char *buf, size_t sta
 Line 980  ordinary(struct state *st, const char *buf, size_t sta
                          for ( ; i > start; i--)
                                  if ('-' != buf[i])
                                          break;
-                         printf(".Nm ");
+                         donamenm(st, buf, &start, i + 1);
-                         formatcodeln(st, buf, &start, i + 1, 1);
-                         putchar('\n');
                          start = j + 1;
-                         printf(".Nd ");
+                         while (start < end && ' ' == buf[start])
+                                 start++;
+                         fputs(".Nd ", stdout);
                          formatcodeln(st, buf, &start, end, 1);
                          putchar('\n');
                          return;
-Line 802  ordinary(struct state *st, const char *buf, size_t sta
+Line 996  ordinary(struct state *st, const char *buf, size_t sta
 Line 802  ordinary(struct state *st, const char *buf, size_t sta
 Line 996  ordinary(struct state *st, const char *buf, size_t sta
          st->haspar = 0;
          last = '\n';
+         opstack = 0;
-         while (start < end) {
+         for (seq = 0; start < end; seq++) {
                  /*
                   * Loop til we get either to a newline or escape.
                   * Escape initial control characters.
-Line 817  ordinary(struct state *st, const char *buf, size_t sta
+Line 1012  ordinary(struct state *st, const char *buf, size_t sta
 Line 817  ordinary(struct state *st, const char *buf, size_t sta
 Line 1012  ordinary(struct state *st, const char *buf, size_t sta
                                  printf("\\&");
                          else if ('\n' == last && '\'' == buf[start])
                                  printf("\\&");
- #if notyet
                          /*
                           * If we're in the SYNOPSIS, have square
                           * brackets indicate that we're opening and
                           * closing an optional context.
                           */
-                         if (SECT_SYNOPSIS == st->sect) {
+                         if (SECT_SYNOPSIS == st->sect &&
-                                 if ('[' == buf[start] ||
+                                 ('[' == buf[start] ||
-                                         ']' == buf[start]) {
+                                  ']' == buf[start]) &&
-                                         if (last != '\n')
+                                 dosynopsisop(buf, &last,
-                                                 putchar('\n');
+                                         &start, end, &opstack))
-                                         if ('[' == buf[start])
+                                 continue;
-                                                 printf(".Oo\n");
-                                         else
-                                                 printf(".Oc\n");
-                                         start++;
-                                         continue;
-                                 }
-                         }
- #endif
                          putchar(last = buf[start++]);
                          if ('\\' == last)
                                  putchar('e');
-Line 851  ordinary(struct state *st, const char *buf, size_t sta
+Line 1037  ordinary(struct state *st, const char *buf, size_t sta
 Line 851  ordinary(struct state *st, const char *buf, size_t sta
 Line 1037  ordinary(struct state *st, const char *buf, size_t sta
                           * following that, a newline.
                           * Consume all whitespace so we don't
                           * accidentally start an implicit literal line.
+                          * If the macro ends with a flush comma or
+                          * period, let mdoc(7) handle it for us.
                           */
-                         if (formatcode(st, buf, &start, end, 0, 0)) {
+                         if (formatcode(st, buf, &start, end, 0, 0, seq)) {
+                                 if ((start == end - 1 ||
+                                         (start < end - 1 &&
+                                          (' ' == buf[start + 1] ||
+                                           '\n' == buf[start + 1]))) &&
+                                         ('.' == buf[start] ||
+                                          ',' == buf[start])) {
+                                         putchar(' ');
+                                         putchar(buf[start++]);
+                                 }
                                  putchar(last = '\n');
                                  while (start < end && ' ' == buf[start])
                                          start++;

CVSweb