mandoc/mandoc.c - diff

Return to mandoc.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/mandoc.c between version 1.54 and 1.65

version 1.54, 2011/07/21 15:21:13

version 1.65, 2012/05/31 22:38:16

Line 1

/* $Id$ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

Line 37

static int a2time(time_t *, const char *, const char *);

static char *time2a(time_t);

static int numescape(const char *);

* Pass over recursive numerical expressions. This context of this

enum mandoc_esc

* function is important: it's only called within character-terminating

mandoc_escape(const char **end, const char **start, int *sz)

* escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial

* recursion: we don't care about what's in these blocks.

* This returns the number of characters skipped or -1 if an error

* occurs (the caller should bail).

static int

numescape(const char *start)

{

int i;

const char *local_start;

size_t sz;

int local_sz;

const char *cp;

char term;

enum mandoc_esc gly;

i = 0;

* When the caller doesn't provide return storage,

* use local storage.

/* The expression consists of a subexpression. */

if (NULL == start)

start = &local_start;

if (NULL == sz)

sz = &local_sz;

if ('\\' == start[i]) {

cp = &start[++i];

* Read past the end of the subexpression.

* Bail immediately on errors.

if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

return(-1);

return(i + cp - &start[i]);

}

if ('(' != start[i++])

return(0);

* A parenthesised subexpression. Read until the closing

* Beyond the backslash, at least one input character

* parenthesis, making sure to handle any nested subexpressions

* is part of the escape sequence. With one exception

* that might ruin our parse.

* (see below), that character won't be returned.

while (')' != start[i]) {

sz = strcspn(&start[i], ")\\");

i += (int)sz;

if ('\0' == start[i])

return(-1);

else if ('\\' != start[i])

continue;

cp = &start[++i];

if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

return(-1);

i += cp - &start[i];

}

/* Read past the terminating ')'. */

return(++i);

}

enum mandoc_esc

mandoc_escape(const char **end, const char **start, int *sz)

{

char c, term, numeric;

int i, lim, ssz, rlim;

const char *cp, *rstart;

enum mandoc_esc gly;

cp = *end;

rstart = cp;

if (start)

*start = rstart;

i = lim = 0;

gly = ESCAPE_ERROR;

term = numeric = '\0';

*start = ++*end;

*sz = 0;

term = '\0';

switch ((c = cp[i++])) {

switch ((*start)[-1]) {

* First the glyphs. There are several different forms of

* these, but each eventually returns a substring of the glyph

Line 121 mandoc_escape(const char **end, const char **start, in

Line 76 mandoc_escape(const char **end, const char **start, in

case ('('):

gly = ESCAPE_SPECIAL;

lim = 2;

*sz = 2;

break;

case ('['):

gly = ESCAPE_SPECIAL;

Line 131 mandoc_escape(const char **end, const char **start, in

Line 86 mandoc_escape(const char **end, const char **start, in

* Unicode codepoint. Here, however, only check whether

* it's not a zero-width escape.

if ('u' == cp[i] && ']' != cp[i + 1])

if ('u' == (*start)[0] && ']' != (*start)[1])

gly = ESCAPE_UNICODE;

term = ']';

break;

case ('C'):

if ('\'' != cp[i])

if ('\'' != **start)

return(ESCAPE_ERROR);

gly = ESCAPE_SPECIAL;

*start = ++*end;

term = '\'';

break;

* The \z escape is supposed to output the following

* character without advancing the cursor position.

* Since we are mostly dealing with terminal mode,

* let us just skip the next character.

case ('z'):

return(ESCAPE_SKIPCHAR);

* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where

* 'X' is the trigger. These have opaque sub-strings.

Line 161 mandoc_escape(const char **end, const char **start, in

Line 126 mandoc_escape(const char **end, const char **start, in

case ('V'):

/* FALLTHROUGH */

case ('Y'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_IGNORE;

/* FALLTHROUGH */

case ('f'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_FONT;

switch (**start) {

rstart= &cp[i];

if (start)

*start = rstart;

switch (cp[i++]) {

case ('('):

lim = 2;

*start = ++*end;

*sz = 2;

break;

case ('['):

*start = ++*end;

term = ']';

break;

default:

lim = 1;

*sz = 1;

i--;

break;

}

break;

Line 203 mandoc_escape(const char **end, const char **start, in

Line 163 mandoc_escape(const char **end, const char **start, in

case ('X'):

/* FALLTHROUGH */

case ('Z'):

if ('\'' != cp[i++])

if ('\'' != **start)

return(ESCAPE_ERROR);

gly = ESCAPE_IGNORE;

*start = ++*end;

term = '\'';

break;

Line 222 mandoc_escape(const char **end, const char **start, in

Line 183 mandoc_escape(const char **end, const char **start, in

case ('L'):

/* FALLTHROUGH */

case ('l'):

gly = ESCAPE_NUMBERED;

/* FALLTHROUGH */

case ('N'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_NUMBERED;

/* FALLTHROUGH */

case ('S'):

/* FALLTHROUGH */

case ('v'):

Line 234 mandoc_escape(const char **end, const char **start, in

Line 192 mandoc_escape(const char **end, const char **start, in

case ('w'):

/* FALLTHROUGH */

case ('x'):

if ('\'' != **start)

return(ESCAPE_ERROR);

if (ESCAPE_ERROR == gly)

gly = ESCAPE_IGNORE;

if ('\'' != cp[i++])

*start = ++*end;

return(ESCAPE_ERROR);

term = '\'';

term = numeric = '\'';

break;

* Special handling for the numbered character escape.

* XXX Do any other escapes need similar handling?

case ('N'):

if ('\0' == **start)

return(ESCAPE_ERROR);

(*end)++;

if (isdigit((unsigned char)**start)) {

*sz = 1;

return(ESCAPE_IGNORE);

}

(*start)++;

while (isdigit((unsigned char)**end))

(*end)++;

*sz = *end - *start;

if ('\0' != **end)

(*end)++;

return(ESCAPE_NUMBERED);

* Sizes get a special category of their own.

case ('s'):

gly = ESCAPE_IGNORE;

rstart = &cp[i];

if (start)

*start = rstart;

/* See +/- counts as a sign. */

c = cp[i];

if ('+' == **end || '-' == **end || ASCII_HYPH == **end)

if ('+' == c || '-' == c || ASCII_HYPH == c)

(*end)++;

++i;

switch (cp[i++]) {

switch (**end) {

case ('('):

lim = 2;

*start = ++*end;

*sz = 2;

break;

case ('['):

term = numeric = ']';

*start = ++*end;

term = ']';

break;

case ('\''):

term = numeric = '\'';

*start = ++*end;

term = '\'';

break;

default:

lim = 1;

*sz = 1;

i--;

break;

}

/* See +/- counts as a sign. */

c = cp[i];

if ('+' == c || '-' == c || ASCII_HYPH == c)

++i;

break;

* Anything else is assumed to be a glyph.

* In this case, pass back the character after the backslash.

default:

gly = ESCAPE_SPECIAL;

lim = 1;

*start = --*end;

i--;

*sz = 1;

break;

}

assert(ESCAPE_ERROR != gly);

rstart = &cp[i];

if (start)

*start = rstart;

* If a terminating block has been specified, we need to

* Read up to the terminating character,

* handle the case of recursion, which could have their

* paying attention to nested escapes.

* own terminating blocks that mess up our parse. This, by the

* way, means that the "start" and "size" values will be

* effectively meaningless.

ssz = 0;

if (numeric && -1 == (ssz = numescape(&cp[i])))

return(ESCAPE_ERROR);

i += ssz;

rlim = -1;

* We have a character terminator. Try to read up to that

* character. If we can't (i.e., we hit the nil), then return

* an error; if we can, calculate our length, read past the

* terminating character, and exit.

if ('\0' != term) {

*end = strchr(&cp[i], term);

while (**end != term) {

if ('\0' == *end)

switch (**end) {

case ('\0'):

return(ESCAPE_ERROR);

case ('\\'):

(*end)++;

if (ESCAPE_ERROR ==

mandoc_escape(end, NULL, NULL))

return(ESCAPE_ERROR);

break;

default:

(*end)++;

break;

}

*sz = (*end)++ - *start;

} else {

assert(*sz > 0);

if ((size_t)*sz > strlen(*start))

return(ESCAPE_ERROR);

*end += *sz;

rlim = *end - &cp[i];

if (sz)

*sz = rlim;

(*end)++;

goto out;

}

assert(lim > 0);

* We have a numeric limit. If the string is shorter than that,

* stop and return an error. Else adjust our endpoint, length,

* and return the current glyph.

if ((size_t)lim > strlen(&cp[i]))

return(ESCAPE_ERROR);

rlim = lim;

if (sz)

*sz = rlim;

*end = &cp[i] + lim;

out:

assert(rlim >= 0 && rstart);

/* Run post-processors. */

switch (gly) {

case (ESCAPE_FONT):

if (1 != rlim)

* Pretend that the constant-width font modes are the

* same as the regular font modes.

if (2 == *sz && 'C' == **start) {

(*start)++;

(*sz)--;

} else if (1 != *sz)

break;

switch (*rstart) {

switch (**start) {

case ('3'):

/* FALLTHROUGH */

case ('B'):

Line 377 out:

Line 328 out:

}

break;

case (ESCAPE_SPECIAL):

if (1 != rlim)

if (1 == *sz && 'c' == **start)

break;

if ('c' == *rstart)

gly = ESCAPE_NOSPACE;

break;

default:

Line 432 mandoc_realloc(void *ptr, size_t size)

Line 381 mandoc_realloc(void *ptr, size_t size)

return(ptr);

}

char *

mandoc_strndup(const char *ptr, size_t sz)

{

char *p;

p = mandoc_malloc(sz + 1);

memcpy(p, ptr, sz);

p[(int)sz] = '\0';

return(p);

}

char *

mandoc_strdup(const char *ptr)

{

Line 532 a2time(time_t *t, const char *fmt, const char *p)

Line 491 a2time(time_t *t, const char *fmt, const char *p)

memset(&tm, 0, sizeof(struct tm));

pp = NULL;

#ifdef HAVE_STRPTIME

pp = strptime(p, fmt, &tm);

#endif

if (NULL != pp && '\0' == *pp) {

*t = mktime(&tm);

return(1);

Line 544 a2time(time_t *t, const char *fmt, const char *p)

Line 506 a2time(time_t *t, const char *fmt, const char *p)

static char *

time2a(time_t t)

{

struct tm tm;

struct tm *tm;

char *buf, *p;

size_t ssz;

int isz;

localtime_r(&t, &tm);

tm = localtime(&t);

* Reserve space:

Line 559 time2a(time_t t)

Line 521 time2a(time_t t)

p = buf = mandoc_malloc(10 + 4 + 4 + 1);

if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm)))

if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))

goto fail;

p += (int)ssz;

if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday)))

if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))

goto fail;

p += isz;

if (0 == strftime(p, 4 + 1, "%Y", &tm))

if (0 == strftime(p, 4 + 1, "%Y", tm))

goto fail;

return(buf);

Line 587 mandoc_normdate(struct mparse *parse, char *in, int ln

Line 549 mandoc_normdate(struct mparse *parse, char *in, int ln

mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);

time(&t);

}

else if (a2time(&t, "%Y-%m-%d", in))

t = 0;

else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&

!a2time(&t, "%b %d, %Y", in) &&

!a2time(&t, "%b %d, %Y", in)) {

!a2time(&t, "%Y-%m-%d", in)) {

mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);

t = 0;

}

Line 638 mandoc_eos(const char *p, size_t sz, int enclosed)

Line 601 mandoc_eos(const char *p, size_t sz, int enclosed)

}

return(found && !enclosed);

}

int

mandoc_hyph(const char *start, const char *c)

{

* Choose whether to break at a hyphenated character. We only

* do this if it's free-standing within a word.

/* Skip first/last character of buffer. */

if (c == start || '\0' == *(c + 1))

return(0);

/* Skip first/last character of word. */

if ('\t' == *(c + 1) || '\t' == *(c - 1))

return(0);

if (' ' == *(c + 1) || ' ' == *(c - 1))

return(0);

/* Skip double invocations. */

if ('-' == *(c + 1) || '-' == *(c - 1))

return(0);

/* Skip escapes. */

if ('\\' == *(c - 1))

return(0);

return(1);

}

CVSweb