mandoc/mandoc.c - diff

Return to mandoc.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/mandoc.c between version 1.48 and 1.64

version 1.48, 2011/04/19 16:38:48

version 1.64, 2012/05/31 22:34:06

Line 1

/* $Id$ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

Line 23

#include <assert.h>

#include <ctype.h>

#include <errno.h>

#include <limits.h>

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

Line 35

Line 37

static int a2time(time_t *, const char *, const char *);

static char *time2a(time_t);

static int numescape(const char *);

* Pass over recursive numerical expressions. This context of this

* function is important: it's only called within character-terminating

* escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial

* recursion: we don't care about what's in these blocks.

* This returns the number of characters skipped or -1 if an error

* occurs (the caller should bail).

static int

numescape(const char *start)

{

int i;

size_t sz;

const char *cp;

i = 0;

/* The expression consists of a subexpression. */

if ('\\' == start[i]) {

cp = &start[++i];

* Read past the end of the subexpression.

* Bail immediately on errors.

if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

return(-1);

return(i + cp - &start[i]);

}

if ('(' != start[i++])

return(0);

* A parenthesised subexpression. Read until the closing

* parenthesis, making sure to handle any nested subexpressions

* that might ruin our parse.

while (')' != start[i]) {

sz = strcspn(&start[i], ")\\");

i += (int)sz;

if ('\0' == start[i])

return(-1);

else if ('\\' != start[i])

continue;

cp = &start[++i];

if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

return(-1);

i += cp - &start[i];

}

/* Read past the terminating ')'. */

return(++i);

}

enum mandoc_esc

mandoc_escape(const char **end, const char **start, int *sz)

{

char c, term, numeric;

char c, term;

int i, lim, ssz, rlim;

int i, rlim;

const char *cp, *rstart;

enum mandoc_esc gly;

Line 107 mandoc_escape(const char **end, const char **start, in

Line 51 mandoc_escape(const char **end, const char **start, in

rstart = cp;

if (start)

*start = rstart;

i = lim = 0;

i = rlim = 0;

gly = ESCAPE_ERROR;

term = numeric = '\0';

term = '\0';

switch ((c = cp[i++])) {

Line 119 mandoc_escape(const char **end, const char **start, in

Line 63 mandoc_escape(const char **end, const char **start, in

case ('('):

gly = ESCAPE_SPECIAL;

lim = 2;

rlim = 2;

break;

case ('['):

gly = ESCAPE_SPECIAL;

* Unicode escapes are defined in groff as \[uXXXX] to

* \[u10FFFF], where the contained value must be a valid

* Unicode codepoint. Here, however, only check whether

* it's not a zero-width escape.

if ('u' == cp[i] && ']' != cp[i + 1])

gly = ESCAPE_UNICODE;

term = ']';

break;

case ('C'):

Line 133 mandoc_escape(const char **end, const char **start, in

Line 85 mandoc_escape(const char **end, const char **start, in

break;

* The \z escape is supposed to output the following

* character without advancing the cursor position.

* Since we are mostly dealing with terminal mode,

* let us just skip the next character.

case ('z'):

(*end)++;

return(ESCAPE_SKIPCHAR);

* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where

* 'X' is the trigger. These have opaque sub-strings.

Line 151 mandoc_escape(const char **end, const char **start, in

Line 113 mandoc_escape(const char **end, const char **start, in

case ('V'):

/* FALLTHROUGH */

case ('Y'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_IGNORE;

/* FALLTHROUGH */

case ('*'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_PREDEF;

/* FALLTHROUGH */

case ('f'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_FONT;

Line 168 mandoc_escape(const char **end, const char **start, in

Line 125 mandoc_escape(const char **end, const char **start, in

switch (cp[i++]) {

case ('('):

lim = 2;

rlim = 2;

break;

case ('['):

term = ']';

break;

default:

lim = 1;

rlim = 1;

i--;

break;

}

Line 216 mandoc_escape(const char **end, const char **start, in

Line 173 mandoc_escape(const char **end, const char **start, in

case ('L'):

/* FALLTHROUGH */

case ('l'):

gly = ESCAPE_NUMBERED;

/* FALLTHROUGH */

case ('N'):

if (ESCAPE_ERROR == gly)

gly = ESCAPE_NUMBERED;

/* FALLTHROUGH */

case ('S'):

/* FALLTHROUGH */

case ('v'):

Line 232 mandoc_escape(const char **end, const char **start, in

Line 186 mandoc_escape(const char **end, const char **start, in

gly = ESCAPE_IGNORE;

if ('\'' != cp[i++])

return(ESCAPE_ERROR);

term = numeric = '\'';

term = '\'';

break;

* Special handling for the numbered character escape.

* XXX Do any other escapes need similar handling?

case ('N'):

if ('\0' == cp[i])

return(ESCAPE_ERROR);

*end = &cp[++i];

if (isdigit((unsigned char)cp[i-1]))

return(ESCAPE_IGNORE);

while (isdigit((unsigned char)**end))

(*end)++;

if (start)

*start = &cp[i];

if (sz)

*sz = *end - &cp[i];

if ('\0' != **end)

(*end)++;

return(ESCAPE_NUMBERED);

* Sizes get a special category of their own.

Line 252 mandoc_escape(const char **end, const char **start, in

Line 226 mandoc_escape(const char **end, const char **start, in

switch (cp[i++]) {

case ('('):

lim = 2;

rlim = 2;

break;

case ('['):

term = numeric = ']';

term = ']';

break;

case ('\''):

term = numeric = '\'';

term = '\'';

break;

default:

lim = 1;

rlim = 1;

i--;

break;

}

Line 278 mandoc_escape(const char **end, const char **start, in

Line 252 mandoc_escape(const char **end, const char **start, in

default:

gly = ESCAPE_SPECIAL;

lim = 1;

rlim = 1;

i--;

break;

}

assert(ESCAPE_ERROR != gly);

rstart = &cp[i];

*end = rstart = &cp[i];

if (start)

*start = rstart;

* If a terminating block has been specified, we need to

* Read up to the terminating character,

* handle the case of recursion, which could have their

* paying attention to nested escapes.

* own terminating blocks that mess up our parse. This, by the

* way, means that the "start" and "size" values will be

* effectively meaningless.

ssz = 0;

if (numeric && -1 == (ssz = numescape(&cp[i])))

return(ESCAPE_ERROR);

i += ssz;

rlim = -1;

* We have a character terminator. Try to read up to that

* character. If we can't (i.e., we hit the nil), then return

* an error; if we can, calculate our length, read past the

* terminating character, and exit.

if ('\0' != term) {

*end = strchr(&cp[i], term);

while (**end != term) {

if ('\0' == *end)

switch (**end) {

case ('\0'):

return(ESCAPE_ERROR);

case ('\\'):

(*end)++;

if (ESCAPE_ERROR ==

mandoc_escape(end, NULL, NULL))

return(ESCAPE_ERROR);

break;

default:

(*end)++;

break;

}

rlim = (*end)++ - rstart;

} else {

assert(rlim > 0);

if ((size_t)rlim > strlen(rstart))

return(ESCAPE_ERROR);

*end += rlim;

rlim = *end - &cp[i];

if (sz)

*sz = rlim;

(*end)++;

goto out;

}

assert(lim > 0);

* We have a numeric limit. If the string is shorter than that,

* stop and return an error. Else adjust our endpoint, length,

* and return the current glyph.

if ((size_t)lim > strlen(&cp[i]))

return(ESCAPE_ERROR);

rlim = lim;

if (sz)

*sz = rlim;

*end = &cp[i] + lim;

out:

assert(rlim >= 0 && rstart);

/* Run post-processors. */

switch (gly) {

case (ESCAPE_FONT):

if (1 != rlim)

* Pretend that the constant-width font modes are the

* same as the regular font modes.

if (2 == rlim && 'C' == *rstart)

rstart++;

else if (1 != rlim)

break;

switch (*rstart) {

case ('3'):

/* FALLTHROUGH */

Line 426 mandoc_realloc(void *ptr, size_t size)

Line 384 mandoc_realloc(void *ptr, size_t size)

return(ptr);

}

char *

mandoc_strndup(const char *ptr, size_t sz)

{

char *p;

p = mandoc_malloc(sz + 1);

memcpy(p, ptr, sz);

p[(int)sz] = '\0';

return(p);

}

char *

mandoc_strdup(const char *ptr)

{

Line 526 a2time(time_t *t, const char *fmt, const char *p)

Line 494 a2time(time_t *t, const char *fmt, const char *p)

memset(&tm, 0, sizeof(struct tm));

pp = NULL;

#ifdef HAVE_STRPTIME

pp = strptime(p, fmt, &tm);

#endif

if (NULL != pp && '\0' == *pp) {

*t = mktime(&tm);

return(1);

Line 538 a2time(time_t *t, const char *fmt, const char *p)

Line 509 a2time(time_t *t, const char *fmt, const char *p)

static char *

time2a(time_t t)

{

struct tm tm;

struct tm *tm;

char *buf, *p;

size_t ssz;

int isz;

localtime_r(&t, &tm);

tm = localtime(&t);

* Reserve space:

Line 553 time2a(time_t t)

Line 524 time2a(time_t t)

p = buf = mandoc_malloc(10 + 4 + 4 + 1);

if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm)))

if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))

goto fail;

p += (int)ssz;

if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday)))

if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))

goto fail;

p += isz;

if (0 == strftime(p, 4 + 1, "%Y", &tm))

if (0 == strftime(p, 4 + 1, "%Y", tm))

goto fail;

return(buf);

Line 581 mandoc_normdate(struct mparse *parse, char *in, int ln

Line 552 mandoc_normdate(struct mparse *parse, char *in, int ln

mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);

time(&t);

}

else if (a2time(&t, "%Y-%m-%d", in))

t = 0;

else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&

!a2time(&t, "%b %d, %Y", in) &&

!a2time(&t, "%b %d, %Y", in)) {

!a2time(&t, "%Y-%m-%d", in)) {

mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);

t = 0;

}

Line 603 mandoc_eos(const char *p, size_t sz, int enclosed)

Line 575 mandoc_eos(const char *p, size_t sz, int enclosed)

* End-of-sentence recognition must include situations where

* some symbols, such as `)', allow prior EOS punctuation to

* propogate outward.

* propagate outward.

found = 0;

Line 634 mandoc_eos(const char *p, size_t sz, int enclosed)

Line 606 mandoc_eos(const char *p, size_t sz, int enclosed)

return(found && !enclosed);

}

int

mandoc_hyph(const char *start, const char *c)

{

* Choose whether to break at a hyphenated character. We only

* do this if it's free-standing within a word.

/* Skip first/last character of buffer. */

if (c == start || '\0' == *(c + 1))

return(0);

/* Skip first/last character of word. */

if ('\t' == *(c + 1) || '\t' == *(c - 1))

return(0);

if (' ' == *(c + 1) || ' ' == *(c - 1))

return(0);

/* Skip double invocations. */

if ('-' == *(c + 1) || '-' == *(c - 1))

return(0);

/* Skip escapes. */

if ('\\' == *(c - 1))

return(0);

return(1);

}

* Find out whether a line is a macro line or not. If it is, adjust the

* current position and return one; if it isn't, return zero and don't

Line 685 mandoc_getcontrol(const char *cp, int *ppos)

Line 630 mandoc_getcontrol(const char *cp, int *ppos)

*ppos = pos;

return(1);

}

* Convert a string to a long that may not be <0.

* If the string is invalid, or is less than 0, return -1.

int

mandoc_strntoi(const char *p, size_t sz, int base)

{

char buf[32];

char *ep;

long v;

if (sz > 31)

return(-1);

memcpy(buf, p, sz);

buf[(int)sz] = '\0';

errno = 0;

v = strtol(buf, &ep, base);

if (buf[0] == '\0' || *ep != '\0')

return(-1);

if (v > INT_MAX)

v = INT_MAX;

if (v < INT_MIN)

v = INT_MIN;

return((int)v);

}

CVSweb