mandoc/mandoc.c - diff

Return to mandoc.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/mandoc.c between version 1.63 and 1.69

-version 1.63, 2012/05/31 22:29:13
+version 1.69, 2013/10/05 20:30:05
 Line 1
 Line 1
 Line 1
  /*      $Id$ */
  /*
   * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-  * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+  * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
 Line 37
 Line 37
 Line 37
  static  int      a2time(time_t *, const char *, const char *);
  static  char    *time2a(time_t);
- static  int      numescape(const char *);
- /*
-  * Pass over recursive numerical expressions.  This context of this
+ enum mandoc_esc
-  * function is important: it's only called within character-terminating
+ mandoc_escape(const char const **end, const char const **start, int *sz)
-  * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
-  * recursion: we don't care about what's in these blocks.
-  * This returns the number of characters skipped or -1 if an error
-  * occurs (the caller should bail).
-  */
- static int
- numescape(const char *start)
  {
-         int              i;
+         const char      *local_start;
-         size_t           sz;
+         int              local_sz;
-         const char      *cp;
+         char             term;
+         enum mandoc_esc  gly;
-         i = 0;
+         /*
+          * When the caller doesn't provide return storage,
+          * use local storage.
+          */
-         /* The expression consists of a subexpression. */
+         if (NULL == start)
+                 start = &local_start;
+         if (NULL == sz)
+                 sz = &local_sz;
-         if ('\\' == start[i]) {
-                 cp = &start[++i];
-                 /*
-                  * Read past the end of the subexpression.
-                  * Bail immediately on errors.
-                  */
-                 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
-                         return(-1);
-                 return(i + cp - &start[i]);
-         }
-         if ('(' != start[i++])
-                 return(0);
          /*
-          * A parenthesised subexpression.  Read until the closing
+          * Beyond the backslash, at least one input character
-          * parenthesis, making sure to handle any nested subexpressions
+          * is part of the escape sequence.  With one exception
-          * that might ruin our parse.
+          * (see below), that character won't be returned.
           */
-         while (')' != start[i]) {
-                 sz = strcspn(&start[i], ")\\");
-                 i += (int)sz;
-                 if ('\0' == start[i])
-                         return(-1);
-                 else if ('\\' != start[i])
-                         continue;
-                 cp = &start[++i];
-                 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
-                         return(-1);
-                 i += cp - &start[i];
-         }
-         /* Read past the terminating ')'. */
-         return(++i);
- }
- enum mandoc_esc
- mandoc_escape(const char **end, const char **start, int *sz)
- {
-         char             c, term, numeric;
-         int              i, lim, ssz, rlim;
-         const char      *cp, *rstart;
-         enum mandoc_esc  gly;
-         cp = *end;
-         rstart = cp;
-         if (start)
-                 *start = rstart;
-         i = lim = 0;
          gly = ESCAPE_ERROR;
-         term = numeric = '\0';
+         *start = ++*end;
+         *sz = 0;
+         term = '\0';
-         switch ((c = cp[i++])) {
+         switch ((*start)[-1]) {
          /*
           * First the glyphs.  There are several different forms of
           * these, but each eventually returns a substring of the glyph
-Line 121  mandoc_escape(const char **end, const char **start, in
+Line 76  mandoc_escape(const char **end, const char **start, in
 Line 121  mandoc_escape(const char **end, const char **start, in
 Line 76  mandoc_escape(const char **end, const char **start, in
           */
          case ('('):
                  gly = ESCAPE_SPECIAL;
-                 lim = 2;
+                 *sz = 2;
                  break;
          case ('['):
                  gly = ESCAPE_SPECIAL;
-Line 131  mandoc_escape(const char **end, const char **start, in
+Line 86  mandoc_escape(const char **end, const char **start, in
 Line 131  mandoc_escape(const char **end, const char **start, in
 Line 86  mandoc_escape(const char **end, const char **start, in
                   * Unicode codepoint.  Here, however, only check whether
                   * it's not a zero-width escape.
                   */
-                 if ('u' == cp[i] && ']' != cp[i + 1])
+                 if ('u' == (*start)[0] && ']' != (*start)[1])
                          gly = ESCAPE_UNICODE;
                  term = ']';
                  break;
          case ('C'):
-                 if ('\'' != cp[i])
+                 if ('\'' != **start)
                          return(ESCAPE_ERROR);
                  gly = ESCAPE_SPECIAL;
+                 *start = ++*end;
                  term = '\'';
                  break;
-Line 149  mandoc_escape(const char **end, const char **start, in
+Line 105  mandoc_escape(const char **end, const char **start, in
 Line 149  mandoc_escape(const char **end, const char **start, in
 Line 105  mandoc_escape(const char **end, const char **start, in
           * let us just skip the next character.
           */
          case ('z'):
-                 (*end)++;
                  return(ESCAPE_SKIPCHAR);
          /*
-Line 176  mandoc_escape(const char **end, const char **start, in
+Line 131  mandoc_escape(const char **end, const char **start, in
 Line 176  mandoc_escape(const char **end, const char **start, in
 Line 131  mandoc_escape(const char **end, const char **start, in
          case ('f'):
                  if (ESCAPE_ERROR == gly)
                          gly = ESCAPE_FONT;
+                 switch (**start) {
-                 rstart= &cp[i];
-                 if (start)
-                         *start = rstart;
-                 switch (cp[i++]) {
                  case ('('):
-                         lim = 2;
+                         *start = ++*end;
+                         *sz = 2;
                          break;
                  case ('['):
+                         *start = ++*end;
                          term = ']';
                          break;
                  default:
-                         lim = 1;
+                         *sz = 1;
-                         i--;
                          break;
                  }
                  break;
-Line 212  mandoc_escape(const char **end, const char **start, in
+Line 163  mandoc_escape(const char **end, const char **start, in
 Line 212  mandoc_escape(const char **end, const char **start, in
 Line 163  mandoc_escape(const char **end, const char **start, in
          case ('X'):
                  /* FALLTHROUGH */
          case ('Z'):
-                 if ('\'' != cp[i++])
+                 if ('\'' != **start)
                          return(ESCAPE_ERROR);
                  gly = ESCAPE_IGNORE;
+                 *start = ++*end;
                  term = '\'';
                  break;
-Line 240  mandoc_escape(const char **end, const char **start, in
+Line 192  mandoc_escape(const char **end, const char **start, in
 Line 240  mandoc_escape(const char **end, const char **start, in
 Line 192  mandoc_escape(const char **end, const char **start, in
          case ('w'):
                  /* FALLTHROUGH */
          case ('x'):
+                 if ('\'' != **start)
+                         return(ESCAPE_ERROR);
                  if (ESCAPE_ERROR == gly)
                          gly = ESCAPE_IGNORE;
-                 if ('\'' != cp[i++])
+                 *start = ++*end;
-                         return(ESCAPE_ERROR);
+                 term = '\'';
-                 term = numeric = '\'';
                  break;
          /*
-Line 252  mandoc_escape(const char **end, const char **start, in
+Line 205  mandoc_escape(const char **end, const char **start, in
 Line 252  mandoc_escape(const char **end, const char **start, in
 Line 205  mandoc_escape(const char **end, const char **start, in
           * XXX Do any other escapes need similar handling?
           */
          case ('N'):
-                 if ('\0' == cp[i])
+                 if ('\0' == **start)
                          return(ESCAPE_ERROR);
-                 *end = &cp[++i];
+                 (*end)++;
-                 if (isdigit((unsigned char)cp[i-1]))
+                 if (isdigit((unsigned char)**start)) {
+                         *sz = 1;
                          return(ESCAPE_IGNORE);
+                 }
+                 (*start)++;
                  while (isdigit((unsigned char)**end))
                          (*end)++;
-                 if (start)
+                 *sz = *end - *start;
-                         *start = &cp[i];
-                 if (sz)
-                         *sz = *end - &cp[i];
                  if ('\0' != **end)
                          (*end)++;
                  return(ESCAPE_NUMBERED);
-Line 273  mandoc_escape(const char **end, const char **start, in
+Line 226  mandoc_escape(const char **end, const char **start, in
 Line 273  mandoc_escape(const char **end, const char **start, in
 Line 226  mandoc_escape(const char **end, const char **start, in
          case ('s'):
                  gly = ESCAPE_IGNORE;
-                 rstart = &cp[i];
-                 if (start)
-                         *start = rstart;
                  /* See +/- counts as a sign. */
-                 c = cp[i];
+                 if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
-                 if ('+' == c || '-' == c || ASCII_HYPH == c)
+                         (*end)++;
-                         ++i;
-                 switch (cp[i++]) {
+                 switch (**end) {
                  case ('('):
-                         lim = 2;
+                         *start = ++*end;
+                         *sz = 2;
                          break;
                  case ('['):
-                         term = numeric = ']';
+                         *start = ++*end;
+                         term = ']';
                          break;
                  case ('\''):
-                         term = numeric = '\'';
+                         *start = ++*end;
+                         term = '\'';
                          break;
                  default:
-                         lim = 1;
+                         *sz = 1;
-                         i--;
                          break;
                  }
-                 /* See +/- counts as a sign. */
-                 c = cp[i];
-                 if ('+' == c || '-' == c || ASCII_HYPH == c)
-                         ++i;
                  break;
          /*
           * Anything else is assumed to be a glyph.
+          * In this case, pass back the character after the backslash.
           */
          default:
                  gly = ESCAPE_SPECIAL;
-                 lim = 1;
+                 *start = --*end;
-                 i--;
+                 *sz = 1;
                  break;
          }
          assert(ESCAPE_ERROR != gly);
-         rstart = &cp[i];
-         if (start)
-                 *start = rstart;
          /*
-          * If a terminating block has been specified, we need to
+          * Read up to the terminating character,
-          * handle the case of recursion, which could have their
+          * paying attention to nested escapes.
-          * own terminating blocks that mess up our parse.  This, by the
-          * way, means that the "start" and "size" values will be
-          * effectively meaningless.
           */
-         ssz = 0;
-         if (numeric && -1 == (ssz = numescape(&cp[i])))
-                 return(ESCAPE_ERROR);
-         i += ssz;
-         rlim = -1;
-         /*
-          * We have a character terminator.  Try to read up to that
-          * character.  If we can't (i.e., we hit the nil), then return
-          * an error; if we can, calculate our length, read past the
-          * terminating character, and exit.
-          */
          if ('\0' != term) {
-                 *end = strchr(&cp[i], term);
+                 while (**end != term) {
-                 if ('\0' == *end)
+                         switch (**end) {
+                         case ('\0'):
+                                 return(ESCAPE_ERROR);
+                         case ('\\'):
+                                 (*end)++;
+                                 if (ESCAPE_ERROR ==
+                                     mandoc_escape(end, NULL, NULL))
+                                         return(ESCAPE_ERROR);
+                                 break;
+                         default:
+                                 (*end)++;
+                                 break;
+                         }
+                 }
+                 *sz = (*end)++ - *start;
+         } else {
+                 assert(*sz > 0);
+                 if ((size_t)*sz > strlen(*start))
                          return(ESCAPE_ERROR);
+                 *end += *sz;
-                 rlim = *end - &cp[i];
-                 if (sz)
-                         *sz = rlim;
-                 (*end)++;
-                 goto out;
          }
-         assert(lim > 0);
-         /*
-          * We have a numeric limit.  If the string is shorter than that,
-          * stop and return an error.  Else adjust our endpoint, length,
-          * and return the current glyph.
-          */
-         if ((size_t)lim > strlen(&cp[i]))
-                 return(ESCAPE_ERROR);
-         rlim = lim;
-         if (sz)
-                 *sz = rlim;
-         *end = &cp[i] + lim;
- out:
-         assert(rlim >= 0 && rstart);
          /* Run post-processors. */
          switch (gly) {
          case (ESCAPE_FONT):
-                 /*
+                 if (2 == *sz) {
-                  * Pretend that the constant-width font modes are the
+                         if ('C' == **start) {
-                  * same as the regular font modes.
+                                 /*
-                  */
+                                  * Treat constant-width font modes
-                 if (2 == rlim && 'C' == *rstart)
+                                  * just like regular font modes.
-                         rstart++;
+                                  */
-                 else if (1 != rlim)
+                                 (*start)++;
+                                 (*sz)--;
+                         } else {
+                                 if ('B' == (*start)[0] && 'I' == (*start)[1])
+                                         gly = ESCAPE_FONTBI;
+                                 break;
+                         }
+                 } else if (1 != *sz)
                          break;
-                 switch (*rstart) {
+                 switch (**start) {
                  case ('3'):
                          /* FALLTHROUGH */
                  case ('B'):
-Line 410  out:
+Line 334  out:
 Line 410  out:
 Line 334  out:
                  }
                  break;
          case (ESCAPE_SPECIAL):
-                 if (1 != rlim)
+                 if (1 == *sz && 'c' == **start)
-                         break;
-                 if ('c' == *rstart)
                          gly = ESCAPE_NOSPACE;
                  break;
          default:
-Line 516  mandoc_getarg(struct mparse *parse, char **cpp, int ln
+Line 438  mandoc_getarg(struct mparse *parse, char **cpp, int ln
 Line 516  mandoc_getarg(struct mparse *parse, char **cpp, int ln
 Line 438  mandoc_getarg(struct mparse *parse, char **cpp, int ln
          pairs = 0;
          white = 0;
          for (cp = start; '\0' != *cp; cp++) {
-                 /* Move left after quoted quotes and escaped backslashes. */
+                 /*
+                  * Move the following text left
+                  * after quoted quotes and after "\\" and "\t".
+                  */
                  if (pairs)
                          cp[-pairs] = cp[0];
                  if ('\\' == cp[0]) {
-                         if ('\\' == cp[1]) {
+                         /*
-                                 /* Poor man's copy mode. */
+                          * In copy mode, translate double to single
+                          * backslashes and backslash-t to literal tabs.
+                          */
+                         switch (cp[1]) {
+                         case ('t'):
+                                 cp[0] = '\t';
+                                 /* FALLTHROUGH */
+                         case ('\\'):
                                  pairs++;
                                  cp++;
-                         } else if (0 == quoted && ' ' == cp[1])
+                                 break;
+                         case (' '):
                                  /* Skip escaped blanks. */
-                                 cp++;
+                                 if (0 == quoted)
+                                         cp++;
+                                 break;
+                         default:
+                                 break;
+                         }
                  } else if (0 == quoted) {
                          if (' ' == cp[0]) {
                                  /* Unescaped blanks end unquoted args. */
-Line 685  mandoc_eos(const char *p, size_t sz, int enclosed)
+Line 625  mandoc_eos(const char *p, size_t sz, int enclosed)
 Line 685  mandoc_eos(const char *p, size_t sz, int enclosed)
 Line 625  mandoc_eos(const char *p, size_t sz, int enclosed)
          }
          return(found && !enclosed);
- }
- /*
-  * Find out whether a line is a macro line or not.  If it is, adjust the
-  * current position and return one; if it isn't, return zero and don't
-  * change the current position.
-  */
- int
- mandoc_getcontrol(const char *cp, int *ppos)
- {
-         int             pos;
-         pos = *ppos;
-         if ('\\' == cp[pos] && '.' == cp[pos + 1])
-                 pos += 2;
-         else if ('.' == cp[pos] || '\'' == cp[pos])
-                 pos++;
-         else
-                 return(0);
-         while (' ' == cp[pos] || '\t' == cp[pos])
-                 pos++;
-         *ppos = pos;
-         return(1);
  }
  /*

CVSweb