pod2mdoc/pod2mdoc.c - diff

Return to pod2mdoc.c CVS log

Up to [cvsweb.bsd.lv] / pod2mdoc

Diff for /pod2mdoc/pod2mdoc.c between version 1.1 and 1.5

-version 1.1, 2014/03/20 15:07:56
+version 1.5, 2014/03/23 13:00:24
 Line 32  struct args {
 Line 32  struct args {
 Line 32  struct args {
          const char      *section; /* override "Dt" section */
  };
+ enum    list {
+         LIST_BULLET = 0,
+         LIST_ENUM,
+         LIST_TAG,
+         LIST__MAX
+ };
  struct  state {
          int              parsing; /* after =cut of before command */
          int              paused; /* in =begin and before =end */
          int              haspar; /* in paragraph: do we need Pp? */
          int              isname; /* are we the NAME section? */
          const char      *fname; /* file being parsed */
+ #define LIST_STACKSZ     128
+         enum list        lstack[LIST_STACKSZ]; /* open lists */
+         size_t           lpos; /* where in list stack */
  };
  enum    fmt {
-Line 144  formatescape(const char *buf, size_t *start, size_t en
+Line 154  formatescape(const char *buf, size_t *start, size_t en
 Line 144  formatescape(const char *buf, size_t *start, size_t en
 Line 154  formatescape(const char *buf, size_t *start, size_t en
  /*
   * Skip space characters.
   */
- static void
+ static int
  skipspace(const char *buf, size_t *start, size_t end)
  {
+         size_t           sv = *start;
          while (*start < end && ' ' == buf[*start])
                  (*start)++;
+         return(*start > sv);
  }
  /*
-Line 168  formatcode(const char *buf, size_t *start,
+Line 181  formatcode(const char *buf, size_t *start,
 Line 168  formatcode(const char *buf, size_t *start,
 Line 181  formatcode(const char *buf, size_t *start,
          size_t end, int reentrant, int last, int nomacro)
  {
          enum fmt         fmt;
+         size_t           i, j, dsz;
          assert(*start + 1 < end);
          assert('<' == buf[*start + 1]);
+         /*
+          * Determine whether we're overriding our delimiter.
+          * According to POD, if we have more than one '<' followed by a
+          * space, then we need a space followed by matching '>' to close
+          * the expression.
+          * Otherwise we use the usual '<' and '>' matched pair.
+          */
+         i = *start + 1;
+         while (i < end && '<' == buf[i])
+                 i++;
+         assert(i > *start + 1);
+         dsz = i - (*start + 1);
+         if (dsz > 1 && (i >= end || ' ' != buf[i]))
+                 dsz = 1;
          for (fmt = 0; fmt < FMT__MAX; fmt++)
                  if (buf[*start] == fmts[fmt])
                          break;
-Line 184  formatcode(const char *buf, size_t *start,
+Line 213  formatcode(const char *buf, size_t *start,
 Line 184  formatcode(const char *buf, size_t *start,
 Line 213  formatcode(const char *buf, size_t *start,
                  return(0);
          }
-         *start += 2;
+         /* Remember, if dsz>1, to jump the trailing space. */
+         *start += dsz + 1 + (dsz > 1 ? 1 : 0);
          /*
           * Escapes don't print macro sequences, so just output them like
-Line 194  formatcode(const char *buf, size_t *start,
+Line 224  formatcode(const char *buf, size_t *start,
 Line 194  formatcode(const char *buf, size_t *start,
 Line 224  formatcode(const char *buf, size_t *start,
                  formatescape(buf, start, end);
                  return(0);
          } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
-                 /* For indices and nulls, just consume. */
+                 /*
-                 while (*start < end && '>' != buf[*start])
+                  * For indices and nulls, just consume.
-                         (*start)++;
+                  * Be wary of encountering custom delimiters (dsz>1),
-                 if (*start < end)
+                  * which require special handling.
-                         (*start)++;
+                  */
+                 for ( ; *start < end; (*start)++) {
+                         if ('>' != buf[*start])
+                                 continue;
+                         else if (dsz == 1)
+                                 break;
+                         assert(*start > 0);
+                         if (' ' != buf[*start - 1])
+                                 continue;
+                         i = *start;
+                         for (j = 0; i < end && j < dsz; j++)
+                                 if ('>' != buf[i++])
+                                         break;
+                         if (dsz != j)
+                                 continue;
+                         (*start) += dsz;
+                         break;
+                 }
                  return(0);
          }
-Line 231  formatcode(const char *buf, size_t *start,
+Line 278  formatcode(const char *buf, size_t *start,
 Line 231  formatcode(const char *buf, size_t *start,
 Line 278  formatcode(const char *buf, size_t *start,
                          printf("Sy ");
                          break;
                  case (FMT_CODE):
-                         printf("Li ");
+                         printf("Qo Li ");
                          break;
                  case (FMT_LINK):
                          printf("Lk ");
-Line 249  formatcode(const char *buf, size_t *start,
+Line 296  formatcode(const char *buf, size_t *start,
 Line 249  formatcode(const char *buf, size_t *start,
 Line 296  formatcode(const char *buf, size_t *start,
          }
          /*
-          * Read until we reach the end market ('>') or until we find a
+          * Read until we reach the end market (e.g., '>') or until we
-          * nested format code.
+          * find a nested format code.
           * Don't emit any newlines: since we're on a macro line, we
           * don't want to break the line.
           */
          while (*start < end) {
-                 if ('>' == buf[*start]) {
+                 if ('>' == buf[*start] && 1 == dsz) {
                          (*start)++;
                          break;
+                 } else if ('>' == buf[*start] &&
+                                 ' ' == buf[*start - 1]) {
+                         /*
+                          * Handle custom delimiters.
+                          * These require a certain number of
+                          * space-preceded carrots before we're really at
+                          * the end.
+                          */
+                         i = *start;
+                         for (j = 0; i < end && j < dsz; j++)
+                                 if ('>' != buf[i++])
+                                         break;
+                         if (dsz == j) {
+                                 *start += dsz;
+                                 break;
+                         }
                  }
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
                          formatcode(buf, start, end, 1, last, nomacro);
                          continue;
                  }
-                 if ('\n' != buf[*start]) {
-                         /*
+                 /*
-                          * Make sure that any macro-like words (or
+                  * Make sure that any macro-like words (or
-                          * really any word starting with a capital
+                  * really any word starting with a capital
-                          * letter) is assumed to be a macro that must be
+                  * letter) is assumed to be a macro that must be
-                          * escaped.
+                  * escaped.
-                          * XXX: should this be isalpha()?
+                  * This matches "Xx " and "XxEOLN".
-                          */
+                  */
-                         if ((' ' == last || '\n' == last) &&
+                 if ((' ' == last || '\n' == last) &&
-                                 isupper(buf[*start]))
+                                 end - *start > 1 &&
-                                 printf("\\&");
+                                 isupper((int)buf[*start]) &&
-                         putchar(last = buf[*start]);
+                                 islower((int)buf[*start + 1]) &&
-                 }
+                                 (end - *start == 2 ||
-                 (*start)++;
+                                  ' ' == buf[*start + 2]))
+                         printf("\\&");
+                 /* Suppress newline. */
+                 if ('\n' == (last = buf[(*start)++]))
+                         last = ' ';
+                 putchar(last);
          }
+         if ( ! nomacro && FMT_CODE == fmt)
+                 printf(" Qc ");
          if (reentrant)
                  return(1);
+         /* FIXME: with the "Qc", this doens't work good. */
          /*
           * If we're not reentrant, we want to put ending punctuation on
           * the macro line so that it's properly handled by being
           * smooshed against the terminal word.
           */
          skipspace(buf, start, end);
          if (',' != buf[*start] && '.' != buf[*start] &&
                  '!' != buf[*start] && '?' != buf[*start] &&
                  ')' != buf[*start])
-Line 315  formatcodeln(const char *buf, size_t *start, size_t en
+Line 391  formatcodeln(const char *buf, size_t *start, size_t en
 Line 315  formatcodeln(const char *buf, size_t *start, size_t en
 Line 391  formatcodeln(const char *buf, size_t *start, size_t en
  {
          int              last;
-         last = '\n';
+         last = ' ';
          while (*start < end)  {
                  if (*start + 1 < end && '<' == buf[*start + 1]) {
                          formatcode(buf, start, end, 1, last, nomacro);
                          continue;
                  }
+                 /*
+                  * Since we're already on a macro line, we want to make
+                  * sure that we don't inadvertently invoke a macro.
+                  * We need to do this carefully because section names
+                  * are used in troff and we don't want to escape
+                  * something that needn't be escaped.
+                  */
+                 if (' ' == last && end - *start > 1 &&
+                                 isupper((int)buf[*start]) &&
+                                 islower((int)buf[*start + 1]) &&
+                                 (end - *start == 2 ||
+                                  ' ' == buf[*start + 2]))
+                         printf("\\&");
                  if ('\n' != buf[*start])
                          putchar(last = buf[*start]);
+                 else
+                         putchar(last = ' ');
                  (*start)++;
          }
  }
  /*
+  * Guess at what kind of list we are.
+  * These are taken straight from the POD manual.
+  * I don't know what people do in real life.
+  */
+ static enum list
+ listguess(const char *buf, size_t start, size_t end)
+ {
+         size_t           len = end - start;
+         assert(end >= start);
+         if (len == 1 && '*' == buf[start])
+                 return(LIST_BULLET);
+         if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
+                 return(LIST_ENUM);
+         else if (len == 1 && '1' == buf[start])
+                 return(LIST_ENUM);
+         else
+                 return(LIST_TAG);
+ }
+ /*
   * A command paragraph, as noted in the perlpod manual, just indicates
   * that we should do something, optionally with some text to print as
   * well.
-Line 404  command(struct state *st, const char *buf, size_t star
+Line 518  command(struct state *st, const char *buf, size_t star
 Line 404  command(struct state *st, const char *buf, size_t star
 Line 518  command(struct state *st, const char *buf, size_t star
                  st->haspar = 1;
                  break;
          case (CMD_OVER):
                  /*
-                  * TODO: we should be doing this after we process the
+                  * If we have an existing list that hasn't had an =item
-                  * first =item to see whether we'll do an -enum,
+                  * yet, then make sure that we open it now.
-                  * -bullet, or something else.
+                  * We use the default list type, but that can't be
+                  * helped (we haven't seen any items yet).
                   */
-                 puts(".Bl -tag -width Ds");
+                 if (st->lpos > 0)
+                         if (LIST__MAX == st->lstack[st->lpos - 1]) {
+                                 st->lstack[st->lpos - 1] = LIST_TAG;
+                                 puts(".Bl -tag -width Ds");
+                         }
+                 st->lpos++;
+                 assert(st->lpos < LIST_STACKSZ);
+                 st->lstack[st->lpos - 1] = LIST__MAX;
                  break;
          case (CMD_ITEM):
-                 printf(".It ");
+                 assert(st->lpos > 0);
-                 formatcodeln(buf, &start, end, 0);
+                 /*
-                 putchar('\n');
+                  * If we're the first =item, guess at what our content
+                  * will be: "*" is a bullet list, "1." is a numbered
+                  * list, and everything is tagged.
+                  */
+                 if (LIST__MAX == st->lstack[st->lpos - 1]) {
+                         st->lstack[st->lpos - 1] =
+                                 listguess(buf, start, end);
+                         switch (st->lstack[st->lpos - 1]) {
+                         case (LIST_BULLET):
+                                 puts(".Bl -bullet");
+                                 break;
+                         case (LIST_ENUM):
+                                 puts(".Bl -enum");
+                                 break;
+                         default:
+                                 puts(".Bl -tag -width Ds");
+                                 break;
+                         }
+                 }
+                 switch (st->lstack[st->lpos - 1]) {
+                 case (LIST_TAG):
+                         printf(".It ");
+                         formatcodeln(buf, &start, end, 0);
+                         putchar('\n');
+                         break;
+                 case (LIST_ENUM):
+                         /* FALLTHROUGH */
+                 case (LIST_BULLET):
+                         /*
+                          * Abandon the remainder of the paragraph
+                          * because we're going to be a bulletted or
+                          * numbered list.
+                          */
+                         puts(".It");
+                         break;
+                 default:
+                         abort();
+                 }
                  st->haspar = 1;
                  break;
          case (CMD_BACK):
-                 puts(".El");
+                 /* Make sure we don't back over the stack. */
+                 if (st->lpos > 0) {
+                         st->lpos--;
+                         puts(".El");
+                 }
                  break;
          case (CMD_BEGIN):
                  /*
-Line 494  ordinary(struct state *st, const char *buf, size_t sta
+Line 657  ordinary(struct state *st, const char *buf, size_t sta
 Line 494  ordinary(struct state *st, const char *buf, size_t sta
 Line 657  ordinary(struct state *st, const char *buf, size_t sta
                          for ( ; i > start; i--)
                                  if ('-' != buf[i])
                                          break;
-                         printf(".Nm %.*s\n",
+                         printf(".Nm ");
-                                 (int)((i + 1) - start), &buf[start]);
+                         formatcodeln(buf, &start, i + 1, 1);
-                         printf(".Nd %.*s\n",
+                         putchar('\n');
-                                 (int)(end - (j + 1)), &buf[j + 1]);
+                         start = j + 1;
+                         printf(".Nd ");
+                         formatcodeln(buf, &start, end, 1);
+                         putchar('\n');
                          return;
                  }
          }

CVSweb