pod2mdoc/pod2mdoc.c - diff

Return to pod2mdoc.c CVS log

Up to [cvsweb.bsd.lv] / pod2mdoc

Diff for /pod2mdoc/pod2mdoc.c between version 1.15 and 1.29

-version 1.15, 2014/04/02 14:50:09
+version 1.29, 2014/07/11 20:45:55
 Line 27
 Line 27
 Line 27
  #include <unistd.h>
  /*
-  * In what section can we find Perl manuals?
+  * In what section can we find Perl module manuals?
+  * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
+  * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
   */
  #define PERL_SECTION    "3p"
-Line 170  formatescape(const char *buf, size_t *start, size_t en
+Line 172  formatescape(const char *buf, size_t *start, size_t en
 Line 170  formatescape(const char *buf, size_t *start, size_t en
 Line 172  formatescape(const char *buf, size_t *start, size_t en
  /*
   * Run some heuristics to intuit a link format.
-  * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
+  * I set "start" to be the end of the sequence (last right-carrot) so
-  * or a general UNIX foo(5) manpage.
-  * If I recognise one, I set "start" to be the end of the sequence so
   * that the caller can safely just continue processing.
-  * Otherwise, I don't touch "start".
+  * If this is just an empty tag, I'll return 0.
   */
  static int
  trylink(const char *buf, size_t *start, size_t end, size_t dsz)
  {
-         size_t          sv, nstart, nend, i, j;
+         size_t           linkstart, realend, linkend,
-         int             hasdouble;
+                          i, j, textsz, stack;
          /*
           * Scan to the start of the terminus.
           * This function is more or less replicated in the formatcode()
           * for null or index formatting codes.
+          * However, we're slightly different because we might have
+          * nested escapes we need to ignore.
           */
-         hasdouble = 0;
+         stack = 0;
-         for (sv = nstart = *start; nstart < end; nstart++) {
+         for (linkstart = realend = *start; realend < end; realend++) {
-                 /* Do we have a double-colon? */
+                 if ('<' == buf[realend])
-                 if (':' == buf[nstart] &&
+                         stack++;
-                         nstart > sv &&
+                 if ('>' != buf[realend])
-                         ':' == buf[nstart - 1])
-                         hasdouble = 1;
-                 if ('>' != buf[nstart])
                          continue;
-                 else if (dsz == 1)
+                 else if (stack-- > 0)
+                         continue;
+                 if (dsz == 1)
                          break;
-                 assert(nstart > 0);
+                 assert(realend > 0);
-                 if (' ' != buf[nstart - 1])
+                 if (' ' != buf[realend - 1])
                          continue;
-                 i = nstart;
+                 for (i = realend, j = 0; i < end && j < dsz; j++)
-                 for (j = 0; i < end && j < dsz; j++)
                          if ('>' != buf[i++])
                                  break;
                  if (dsz == j)
                          break;
          }
-         /* We don't care about stubs. */
+         /* Ignore stubs. */
-         if (nstart == end || nstart == *start)
+         if (realend == end || realend == *start)
                  return(0);
-         /* Set nend to the end of content. */
+         /* Set linkend to the end of content. */
-         nend = nstart;
+         linkend = dsz > 1 ? realend - 1 : realend;
-         if (dsz > 1)
-                 nend--;
-         /*
+         /* Re-scan to see if we have a title or section. */
-          * Provide for some common invocations of the link primitive.
+         for (textsz = *start; textsz < linkend; textsz++)
-          * First, allow us to link to other Perl manuals.
+                 if ('|' == buf[textsz] || '/' == buf[textsz])
-          */
+                         break;
-         if (hasdouble)
+         if (textsz < linkend && '|' == buf[textsz]) {
+                 /* With title: set start, then end at section. */
+                 linkstart = textsz + 1;
+                 textsz = textsz - *start;
+                 for (i = linkstart; i < linkend; i++)
+                         if ('/' == buf[i])
+                                 break;
+                 if (i < linkend)
+                         linkend = i;
+         } else if (textsz < linkend && '/' == buf[textsz]) {
+                 /* With section: set end at section. */
+                 linkend = textsz;
+                 textsz = 0;
+         } else
+                 /* No title, no section. */
+                 textsz = 0;
+         *start = realend;
+         j = linkend - linkstart;
+         /* Do we have only subsection material? */
+         if (0 == j && '/' == buf[linkend]) {
+                 linkstart = linkend + 1;
+                 linkend = dsz > 1 ? realend - 1 : realend;
+                 if (0 == (j = linkend - linkstart))
+                         return(0);
+                 printf("Sx %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         } else if (0 == j)
+                 return(0);
+         /* See if we qualify as being a link or not. */
+         if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
+                 (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
+                 (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
+                 (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
+                 /* Gross. */
+                 printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
+                         realend) - linkstart), &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we qualify as a mailto. */
+         if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
+                 printf("Mt %.*s", (int)j, &buf[linkstart]);
+                 return(1);
+         }
+         /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
+         if ((j > 3 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 3])) {
+                 printf("Xr %.*s %c", (int)(j - 3),
+                         &buf[linkstart], buf[linkend - 2]);
+                 return(1);
+         } else if ((j > 4 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 4])) {
+                 printf("Xr %.*s %.*s", (int)(j - 4),
+                         &buf[linkstart], 2, &buf[linkend - 3]);
+                 return(1);
+         } else if ((j > 5 && ')' == buf[linkend - 1]) &&
+                 ('(' == buf[linkend - 5])) {
+                 printf("Xr %.*s %.*s", (int)(j - 5),
+                         &buf[linkstart], 3, &buf[linkend - 4]);
+                 return(1);
+         }
+         /* Last try: do we have a double-colon? */
+         for (i = linkstart + 1; i < linkend; i++)
+                 if (':' == buf[i] && ':' == buf[i - 1])
+                         break;
+         if (i < linkend)
                  printf("Xr %.*s " PERL_SECTION,
-                         (int)(nend - sv), &buf[sv]);
+                         (int)j, &buf[linkstart]);
-         else if (nend - sv > 3 && isalnum(buf[sv]) &&
-                         ')' == buf[nend - 1] &&
-                         isdigit((int)buf[nend - 2]) &&
-                         '(' == buf[nend - 3])
-                 printf("Xr %.*s %c",
-                         (int)(nend - 3 - sv),
-                         &buf[sv], buf[nend - 2]);
          else
-                 return(0);
+                 printf("Xr %.*s 1", (int)j, &buf[linkstart]);
-         *start = nstart;
          return(1);
  }
-Line 394  formatcode(struct state *st, const char *buf, size_t *
+Line 458  formatcode(struct state *st, const char *buf, size_t *
 Line 394  formatcode(struct state *st, const char *buf, size_t *
 Line 458  formatcode(struct state *st, const char *buf, size_t *
                          (*start) += dsz;
                          break;
                  }
+                 if (*start < end) {
+                         assert('>' == buf[*start]);
+                         (*start)++;
+                 }
+                 if (isspace(last))
+                         while (*start < end && isspace((int)buf[*start]))
+                                 (*start)++;
                  return(0);
          }
-Line 438  formatcode(struct state *st, const char *buf, size_t *
+Line 509  formatcode(struct state *st, const char *buf, size_t *
 Line 438  formatcode(struct state *st, const char *buf, size_t *
 Line 509  formatcode(struct state *st, const char *buf, size_t *
                                          printf("Ar ");
                                  break;
                          }
-                         printf("Sy ");
+                         if (0 == strncmp(buf + *start, "NULL", 4) &&
+                             ('=' == buf[*start + 4] ||
+                              '>' == buf[*start + 4]))
+                                 printf("Dv ");
+                         else
+                                 printf("Sy ");
                          break;
                  case (FMT_CODE):
                          printf("Qo Li ");
                          break;
                  case (FMT_LINK):
+                         /* Try to link; use "No" if it's empty. */
                          if ( ! trylink(buf, start, end, dsz))
                                  printf("No ");
                          break;
-Line 789  static void
+Line 866  static void
 Line 789  static void
 Line 866  static void
  verbatim(struct state *st, const char *buf, size_t start, size_t end)
  {
          int              last;
+         size_t           i;
          if ( ! st->parsing || st->paused)
                  return;
+ again:
+         /*
+          * If we're in the SYNOPSIS, see if we're an #include block.
+          * If we are, then print the "In" macro and re-loop.
+          * This handles any number of inclusions, but only when they
+          * come before the remaining parts...
+          */
+         if (SECT_SYNOPSIS == st->sect) {
+                 i = start;
+                 for (i = start; i < end && ' ' == buf[i]; i++)
+                         /* Spin. */ ;
+                 if (i == end)
+                         return;
+                 /* We're an include block! */
+                 if (end - i > 10 &&
+== memcmp(&buf[i], "#include <", 10)) {
+                         start = i + 10;
+                         while (start < end && ' ' == buf[start])
+                                 start++;
+                         fputs(".In ", stdout);
+                         /* Stop til the '>' marker or we hit eoln. */
+                         while (start < end &&
+                                 '>' != buf[start] && '\n' != buf[start])
+                                 putchar(buf[start++]);
+                         putchar('\n');
+                         if (start < end && '>' == buf[start])
+                                 start++;
+                         if (start < end && '\n' == buf[start])
+                                 start++;
+                         if (start < end)
+                                 goto again;
+                         return;
+                 }
+         }
+         if (start == end)
+                 return;
          puts(".Bd -literal");
          for (last = ' '; start < end; start++) {
                  /*
-Line 866  dosynopsisop(const char *buf, int *last,
+Line 980  dosynopsisop(const char *buf, int *last,
 Line 866  dosynopsisop(const char *buf, int *last,
 Line 980  dosynopsisop(const char *buf, int *last,
  }
  /*
+  * Format multiple "Nm" manpage names in the NAME section.
+  */
+ static void
+ donamenm(struct state *st, const char *buf, size_t *start, size_t end)
+ {
+         size_t   word;
+         while (*start < end && ' ' == buf[*start])
+                 (*start)++;
+         if (end == *start) {
+                 puts(".Nm unknown");
+                 return;
+         }
+         while (*start < end) {
+                 fputs(".Nm ", stdout);
+                 for (word = *start; word < end; word++)
+                         if (',' == buf[word])
+                                 break;
+                 formatcodeln(st, buf, start, word, 1);
+                 if (*start == end) {
+                         putchar('\n');
+                         continue;
+                 }
+                 assert(',' == buf[*start]);
+                 puts(" ,");
+                 (*start)++;
+                 while (*start < end && ' ' == buf[*start])
+                         (*start)++;
+         }
+ }
+ /*
   * Ordinary paragraph.
   * Well, this is really the hardest--POD seems to assume that, for
   * example, a leading space implies a newline, and so on.
-Line 898  ordinary(struct state *st, const char *buf, size_t sta
+Line 1046  ordinary(struct state *st, const char *buf, size_t sta
 Line 898  ordinary(struct state *st, const char *buf, size_t sta
 Line 1046  ordinary(struct state *st, const char *buf, size_t sta
                          for ( ; i > start; i--)
                                  if ('-' != buf[i])
                                          break;
-                         fputs(".Nm ", stdout);
+                         donamenm(st, buf, &start, i + 1);
-                         formatcodeln(st, buf, &start, i + 1, 1);
-                         putchar('\n');
                          start = j + 1;
+                         while (start < end && ' ' == buf[start])
+                                 start++;
                          fputs(".Nd ", stdout);
                          formatcodeln(st, buf, &start, end, 1);
                          putchar('\n');
-Line 955  ordinary(struct state *st, const char *buf, size_t sta
+Line 1103  ordinary(struct state *st, const char *buf, size_t sta
 Line 955  ordinary(struct state *st, const char *buf, size_t sta
 Line 1103  ordinary(struct state *st, const char *buf, size_t sta
                           * following that, a newline.
                           * Consume all whitespace so we don't
                           * accidentally start an implicit literal line.
+                          * If the macro ends with a flush comma or
+                          * period, let mdoc(7) handle it for us.
                           */
                          if (formatcode(st, buf, &start, end, 0, 0, seq)) {
+                                 if ((start == end - 1 ||
+                                         (start < end - 1 &&
+                                          (' ' == buf[start + 1] ||
+                                           '\n' == buf[start + 1]))) &&
+                                         ('.' == buf[start] ||
+                                          ',' == buf[start])) {
+                                         putchar(' ');
+                                         putchar(buf[start++]);
+                                 }
                                  putchar(last = '\n');
                                  while (start < end && ' ' == buf[start])
                                          start++;
-Line 1019  static void
+Line 1178  static void
 Line 1019  static void
 Line 1178  static void
  dofile(const struct args *args, const char *fname,
          const struct tm *tm, const char *buf, size_t sz)
  {
-         size_t           sup, end, i, cur = 0;
-         struct state     st;
-         const char      *section, *date;
          char             datebuf[64];
+         struct state     st;
+         const char      *fbase, *fext, *section, *date;
          char            *title, *cp;
+         size_t           sup, end, i, cur = 0;
          if (0 == sz)
                  return;
-         /* Title is last path component of the filename. */
+         /*
+          * Parsing the filename is almost always required,
+          * except when both the title and the section
+          * are provided on the command line.
+          */
-         if (NULL != args->title)
+         if (NULL == args->title || NULL == args->section) {
-                 title = strdup(args->title);
+                 fbase = strrchr(fname, '/');
-         else if (NULL != (cp = strrchr(fname, '/')))
+                 if (NULL == fbase)
-                 title = strdup(cp + 1);
+                         fbase = fname;
-         else
+                 else
-                 title = strdup(fname);
+                         fbase++;
+                 fext = strrchr(fbase, '.');
+         } else
+                 fext = NULL;
+         /*
+          * The title will be converted to uppercase,
+          * so it needs to be copied.
+          */
+         title = (NULL != args->title) ? strdup(args->title) :
+                 (NULL != fext) ? strndup(fbase, fext - fbase) :
+                 strdup(fbase);
          if (NULL == title) {
                  perror(NULL);
                  exit(EXIT_FAILURE);
-Line 1044  dofile(const struct args *args, const char *fname,
+Line 1219  dofile(const struct args *args, const char *fname,
 Line 1044  dofile(const struct args *args, const char *fname,
 Line 1219  dofile(const struct args *args, const char *fname,
          /* Section is 1 unless suffix is "pm". */
-         if (NULL == (section = args->section)) {
+         section = (NULL != args->section) ? args->section :
-                 section = "1";
+             (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
-                 if (NULL != (cp = strrchr(title, '.'))) {
+             PERL_SECTION;
-                         *cp++ = '\0';
-                         if (0 == strcmp(cp, "pm"))
-                                 section = PERL_SECTION;
-                 }
-         }
          /* Date.  Or the given "tm" if not supplied. */
-Line 1114  readfile(const struct args *args, const char *fname)
+Line 1284  readfile(const struct args *args, const char *fname)
 Line 1114  readfile(const struct args *args, const char *fname)
 Line 1284  readfile(const struct args *args, const char *fname)
          time_t           ttm;
          struct stat      st;
-         assert(NULL != fname);
          fd = 0 != strcmp("-", fname) ?
                  open(fname, O_RDONLY, 0) : STDIN_FILENO;
-Line 1221  main(int argc, char *argv[])
+Line 1389  main(int argc, char *argv[])
 Line 1221  main(int argc, char *argv[])
 Line 1389  main(int argc, char *argv[])
          /* Accept only a single input file. */
-         if (argc > 2)
+         if (argc > 1)
-                 return(EXIT_FAILURE);
+                 goto usage;
          else if (1 == argc)
                  fname = *argv;
-Line 1231  main(int argc, char *argv[])
+Line 1399  main(int argc, char *argv[])
 Line 1231  main(int argc, char *argv[])
 Line 1399  main(int argc, char *argv[])
  usage:
          fprintf(stderr, "usage: %s [-d date] "
-                 "[-n title] [-s section]\n", name);
+             "[-n title] [-s section] [file]\n", name);
          return(EXIT_FAILURE);
  }

CVSweb