Return to mandoc.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.38, 2011/03/15 03:03:54 | version 1.53, 2011/05/24 21:31:23 | ||
---|---|---|---|
|
|
||
#include <assert.h> | #include <assert.h> | ||
#include <ctype.h> | #include <ctype.h> | ||
#include <errno.h> | |||
#include <limits.h> | |||
#include <stdlib.h> | #include <stdlib.h> | ||
#include <stdio.h> | #include <stdio.h> | ||
#include <string.h> | #include <string.h> | ||
|
|
||
static int a2time(time_t *, const char *, const char *); | static int a2time(time_t *, const char *, const char *); | ||
static char *time2a(time_t); | static char *time2a(time_t); | ||
static int numescape(const char *); | |||
int | /* | ||
mandoc_special(char *p) | * Pass over recursive numerical expressions. This context of this | ||
* function is important: it's only called within character-terminating | |||
* escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial | |||
* recursion: we don't care about what's in these blocks. | |||
* This returns the number of characters skipped or -1 if an error | |||
* occurs (the caller should bail). | |||
*/ | |||
static int | |||
numescape(const char *start) | |||
{ | { | ||
int len, i; | int i; | ||
char term; | size_t sz; | ||
char *sv; | const char *cp; | ||
len = 0; | |||
term = '\0'; | |||
sv = p; | |||
assert('\\' == *p); | i = 0; | ||
p++; | |||
switch (*p++) { | /* The expression consists of a subexpression. */ | ||
#if 0 | |||
case ('Z'): | if ('\\' == start[i]) { | ||
cp = &start[++i]; | |||
/* | |||
* Read past the end of the subexpression. | |||
* Bail immediately on errors. | |||
*/ | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
return(i + cp - &start[i]); | |||
} | |||
if ('(' != start[i++]) | |||
return(0); | |||
/* | |||
* A parenthesised subexpression. Read until the closing | |||
* parenthesis, making sure to handle any nested subexpressions | |||
* that might ruin our parse. | |||
*/ | |||
while (')' != start[i]) { | |||
sz = strcspn(&start[i], ")\\"); | |||
i += (int)sz; | |||
if ('\0' == start[i]) | |||
return(-1); | |||
else if ('\\' != start[i]) | |||
continue; | |||
cp = &start[++i]; | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
i += cp - &start[i]; | |||
} | |||
/* Read past the terminating ')'. */ | |||
return(++i); | |||
} | |||
enum mandoc_esc | |||
mandoc_escape(const char **end, const char **start, int *sz) | |||
{ | |||
char c, term, numeric; | |||
int i, lim, ssz, rlim; | |||
const char *cp, *rstart; | |||
enum mandoc_esc gly; | |||
cp = *end; | |||
rstart = cp; | |||
if (start) | |||
*start = rstart; | |||
i = lim = 0; | |||
gly = ESCAPE_ERROR; | |||
term = numeric = '\0'; | |||
switch ((c = cp[i++])) { | |||
/* | |||
* First the glyphs. There are several different forms of | |||
* these, but each eventually returns a substring of the glyph | |||
* name. | |||
*/ | |||
case ('('): | |||
gly = ESCAPE_SPECIAL; | |||
lim = 2; | |||
break; | |||
case ('['): | |||
gly = ESCAPE_SPECIAL; | |||
/* | |||
* Unicode escapes are defined in groff as \[uXXXX] to | |||
* \[u10FFFF], where the contained value must be a valid | |||
* Unicode codepoint. Here, however, only check whether | |||
* it's not a zero-width escape. | |||
*/ | |||
if ('u' == cp[i] && ']' != cp[i + 1]) | |||
gly = ESCAPE_UNICODE; | |||
term = ']'; | |||
break; | |||
case ('C'): | |||
if ('\'' != cp[i]) | |||
return(ESCAPE_ERROR); | |||
gly = ESCAPE_SPECIAL; | |||
term = '\''; | |||
break; | |||
/* | |||
* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where | |||
* 'X' is the trigger. These have opaque sub-strings. | |||
*/ | |||
case ('F'): | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('X'): | case ('g'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('x'): | case ('k'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('S'): | case ('M'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('R'): | case ('m'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('N'): | case ('n'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('l'): | case ('V'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('L'): | case ('Y'): | ||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('H'): | case ('f'): | ||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_FONT; | |||
rstart= &cp[i]; | |||
if (start) | |||
*start = rstart; | |||
switch (cp[i++]) { | |||
case ('('): | |||
lim = 2; | |||
break; | |||
case ('['): | |||
term = ']'; | |||
break; | |||
default: | |||
lim = 1; | |||
i--; | |||
break; | |||
} | |||
break; | |||
/* | |||
* These escapes are of the form \X'Y', where 'X' is the trigger | |||
* and 'Y' is any string. These have opaque sub-strings. | |||
*/ | |||
case ('A'): | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('h'): | case ('b'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('D'): | case ('D'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('C'): | case ('o'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('b'): | case ('R'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('B'): | case ('X'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('a'): | case ('Z'): | ||
/* FALLTHROUGH */ | if ('\'' != cp[i++]) | ||
case ('A'): | return(ESCAPE_ERROR); | ||
if (*p++ != '\'') | gly = ESCAPE_IGNORE; | ||
return(0); | |||
term = '\''; | term = '\''; | ||
break; | break; | ||
#endif | |||
/* | |||
* These escapes are of the form \X'N', where 'X' is the trigger | |||
* and 'N' resolves to a numerical expression. | |||
*/ | |||
case ('B'): | |||
/* FALLTHROUGH */ | |||
case ('h'): | case ('h'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('H'): | |||
/* FALLTHROUGH */ | |||
case ('L'): | |||
/* FALLTHROUGH */ | |||
case ('l'): | |||
/* FALLTHROUGH */ | |||
case ('N'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_NUMBERED; | |||
/* FALLTHROUGH */ | |||
case ('S'): | |||
/* FALLTHROUGH */ | |||
case ('v'): | case ('v'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('w'): | |||
/* FALLTHROUGH */ | |||
case ('x'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | |||
if ('\'' != cp[i++]) | |||
return(ESCAPE_ERROR); | |||
term = numeric = '\''; | |||
break; | |||
/* | |||
* Sizes get a special category of their own. | |||
*/ | |||
case ('s'): | case ('s'): | ||
if (ASCII_HYPH == *p) | gly = ESCAPE_IGNORE; | ||
*p = '-'; | |||
i = 0; | rstart = &cp[i]; | ||
if ('+' == *p || '-' == *p) { | if (start) | ||
p++; | *start = rstart; | ||
i = 1; | |||
} | |||
switch (*p++) { | /* See +/- counts as a sign. */ | ||
c = cp[i]; | |||
if ('+' == c || '-' == c || ASCII_HYPH == c) | |||
++i; | |||
switch (cp[i++]) { | |||
case ('('): | case ('('): | ||
len = 2; | lim = 2; | ||
break; | break; | ||
case ('['): | case ('['): | ||
term = ']'; | term = numeric = ']'; | ||
break; | break; | ||
case ('\''): | case ('\''): | ||
term = '\''; | term = numeric = '\''; | ||
break; | break; | ||
case ('0'): | |||
i = 1; | |||
/* FALLTHROUGH */ | |||
default: | default: | ||
len = 1; | lim = 1; | ||
p--; | i--; | ||
break; | break; | ||
} | } | ||
if (ASCII_HYPH == *p) | /* See +/- counts as a sign. */ | ||
*p = '-'; | c = cp[i]; | ||
if ('+' == *p || '-' == *p) { | if ('+' == c || '-' == c || ASCII_HYPH == c) | ||
if (i) | ++i; | ||
return(0); | |||
p++; | |||
} | |||
/* Handle embedded numerical subexp or escape. */ | |||
if ('(' == *p) { | break; | ||
while (*p && ')' != *p) | |||
if ('\\' == *p++) { | |||
i = mandoc_special(--p); | |||
if (0 == i) | |||
return(0); | |||
p += i; | |||
} | |||
if (')' == *p++) | /* | ||
break; | * Anything else is assumed to be a glyph. | ||
*/ | |||
default: | |||
gly = ESCAPE_SPECIAL; | |||
lim = 1; | |||
i--; | |||
break; | |||
} | |||
return(0); | assert(ESCAPE_ERROR != gly); | ||
} else if ('\\' == *p) { | |||
if (0 == (i = mandoc_special(p))) | |||
return(0); | |||
p += i; | |||
} | |||
break; | rstart = &cp[i]; | ||
#if 0 | if (start) | ||
case ('Y'): | *start = rstart; | ||
/* FALLTHROUGH */ | |||
case ('V'): | /* | ||
/* FALLTHROUGH */ | * If a terminating block has been specified, we need to | ||
case ('$'): | * handle the case of recursion, which could have their | ||
/* FALLTHROUGH */ | * own terminating blocks that mess up our parse. This, by the | ||
case ('n'): | * way, means that the "start" and "size" values will be | ||
/* FALLTHROUGH */ | * effectively meaningless. | ||
#endif | */ | ||
case ('k'): | |||
/* FALLTHROUGH */ | ssz = 0; | ||
case ('M'): | if (numeric && -1 == (ssz = numescape(&cp[i]))) | ||
/* FALLTHROUGH */ | return(ESCAPE_ERROR); | ||
case ('m'): | |||
/* FALLTHROUGH */ | i += ssz; | ||
case ('f'): | rlim = -1; | ||
/* FALLTHROUGH */ | |||
case ('F'): | /* | ||
/* FALLTHROUGH */ | * We have a character terminator. Try to read up to that | ||
case ('*'): | * character. If we can't (i.e., we hit the nil), then return | ||
switch (*p++) { | * an error; if we can, calculate our length, read past the | ||
case ('('): | * terminating character, and exit. | ||
len = 2; | */ | ||
if ('\0' != term) { | |||
*end = strchr(&cp[i], term); | |||
if ('\0' == *end) | |||
return(ESCAPE_ERROR); | |||
rlim = *end - &cp[i]; | |||
if (sz) | |||
*sz = rlim; | |||
(*end)++; | |||
goto out; | |||
} | |||
assert(lim > 0); | |||
/* | |||
* We have a numeric limit. If the string is shorter than that, | |||
* stop and return an error. Else adjust our endpoint, length, | |||
* and return the current glyph. | |||
*/ | |||
if ((size_t)lim > strlen(&cp[i])) | |||
return(ESCAPE_ERROR); | |||
rlim = lim; | |||
if (sz) | |||
*sz = rlim; | |||
*end = &cp[i] + lim; | |||
out: | |||
assert(rlim >= 0 && rstart); | |||
/* Run post-processors. */ | |||
switch (gly) { | |||
case (ESCAPE_FONT): | |||
if (1 != rlim) | |||
break; | break; | ||
case ('['): | switch (*rstart) { | ||
term = ']'; | case ('3'): | ||
/* FALLTHROUGH */ | |||
case ('B'): | |||
gly = ESCAPE_FONTBOLD; | |||
break; | break; | ||
default: | case ('2'): | ||
len = 1; | /* FALLTHROUGH */ | ||
p--; | case ('I'): | ||
gly = ESCAPE_FONTITALIC; | |||
break; | break; | ||
case ('P'): | |||
gly = ESCAPE_FONTPREV; | |||
break; | |||
case ('1'): | |||
/* FALLTHROUGH */ | |||
case ('R'): | |||
gly = ESCAPE_FONTROMAN; | |||
break; | |||
} | } | ||
break; | break; | ||
case ('('): | case (ESCAPE_SPECIAL): | ||
len = 2; | if (1 != rlim) | ||
break; | |||
case ('['): | |||
term = ']'; | |||
break; | |||
case ('z'): | |||
len = 1; | |||
if ('\\' == *p) { | |||
if (0 == (i = mandoc_special(p))) | |||
return(0); | |||
p += i; | |||
return(*p ? (int)(p - sv) : 0); | |||
} | |||
break; | |||
case ('o'): | |||
/* FALLTHROUGH */ | |||
case ('w'): | |||
if ('\'' == *p++) { | |||
term = '\''; | |||
break; | break; | ||
} | if ('c' == *rstart) | ||
/* FALLTHROUGH */ | gly = ESCAPE_NOSPACE; | ||
break; | |||
default: | default: | ||
len = 1; | |||
p--; | |||
break; | break; | ||
} | } | ||
if (term) { | return(gly); | ||
for ( ; *p && term != *p; p++) | |||
if (ASCII_HYPH == *p) | |||
*p = '-'; | |||
return(*p ? (int)(p - sv) : 0); | |||
} | |||
for (i = 0; *p && i < len; i++, p++) | |||
if (ASCII_HYPH == *p) | |||
*p = '-'; | |||
return(i == len ? (int)(p - sv) : 0); | |||
} | } | ||
void * | void * | ||
mandoc_calloc(size_t num, size_t size) | mandoc_calloc(size_t num, size_t size) | ||
{ | { | ||
|
|
||
* or to the null byte terminating the argument line. | * or to the null byte terminating the argument line. | ||
*/ | */ | ||
char * | char * | ||
mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos) | mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) | ||
{ | { | ||
char *start, *cp; | char *start, *cp; | ||
int quoted, pairs, white; | int quoted, pairs, white; | ||
/* Quoting can only start with a new word. */ | /* Quoting can only start with a new word. */ | ||
start = *cpp; | start = *cpp; | ||
quoted = 0; | |||
if ('"' == *start) { | if ('"' == *start) { | ||
quoted = 1; | quoted = 1; | ||
start++; | start++; | ||
} else | } | ||
quoted = 0; | |||
pairs = 0; | pairs = 0; | ||
white = 0; | white = 0; | ||
|
|
||
} | } | ||
/* Quoted argument without a closing quote. */ | /* Quoted argument without a closing quote. */ | ||
if (1 == quoted && msg) | if (1 == quoted) | ||
(*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL); | mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); | ||
/* Null-terminate this argument and move to the next one. */ | /* Null-terminate this argument and move to the next one. */ | ||
if (pairs) | if (pairs) | ||
|
|
||
while (' ' == *cp) | while (' ' == *cp) | ||
cp++; | cp++; | ||
} | } | ||
*pos += (cp - start) + (quoted ? 1 : 0); | *pos += (int)(cp - start) + (quoted ? 1 : 0); | ||
*cpp = cp; | *cpp = cp; | ||
if ('\0' == *cp && msg && (white || ' ' == cp[-1])) | if ('\0' == *cp && (white || ' ' == cp[-1])) | ||
(*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL); | mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); | ||
return(start); | return(start); | ||
} | } | ||
static int | static int | ||
a2time(time_t *t, const char *fmt, const char *p) | a2time(time_t *t, const char *fmt, const char *p) | ||
{ | { | ||
|
|
||
return(0); | return(0); | ||
} | } | ||
static char * | static char * | ||
time2a(time_t t) | time2a(time_t t) | ||
{ | { | ||
|
|
||
return(NULL); | return(NULL); | ||
} | } | ||
char * | char * | ||
mandoc_normdate(char *in, mandocmsg msg, void *data, int ln, int pos) | mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) | ||
{ | { | ||
char *out; | char *out; | ||
time_t t; | time_t t; | ||
if (NULL == in || '\0' == *in || | if (NULL == in || '\0' == *in || | ||
0 == strcmp(in, "$" "Mdocdate$")) { | 0 == strcmp(in, "$" "Mdocdate$")) { | ||
(*msg)(MANDOCERR_NODATE, data, ln, pos, NULL); | mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); | ||
time(&t); | time(&t); | ||
} | } | ||
else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | ||
!a2time(&t, "%b %d, %Y", in) && | !a2time(&t, "%b %d, %Y", in) && | ||
!a2time(&t, "%Y-%m-%d", in)) { | !a2time(&t, "%Y-%m-%d", in)) { | ||
(*msg)(MANDOCERR_BADDATE, data, ln, pos, NULL); | mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); | ||
t = 0; | t = 0; | ||
} | } | ||
out = t ? time2a(t) : NULL; | out = t ? time2a(t) : NULL; | ||
return(out ? out : mandoc_strdup(in)); | return(out ? out : mandoc_strdup(in)); | ||
} | } | ||
int | int | ||
mandoc_eos(const char *p, size_t sz, int enclosed) | mandoc_eos(const char *p, size_t sz, int enclosed) | ||
{ | { | ||
|
|
||
/* | /* | ||
* End-of-sentence recognition must include situations where | * End-of-sentence recognition must include situations where | ||
* some symbols, such as `)', allow prior EOS punctuation to | * some symbols, such as `)', allow prior EOS punctuation to | ||
* propogate outward. | * propagate outward. | ||
*/ | */ | ||
found = 0; | found = 0; | ||
|
|
||
return(found && !enclosed); | return(found && !enclosed); | ||
} | } | ||
int | int | ||
mandoc_hyph(const char *start, const char *c) | mandoc_hyph(const char *start, const char *c) | ||
{ | { | ||
|
|
||
return(1); | return(1); | ||
} | } | ||
/* | |||
* Find out whether a line is a macro line or not. If it is, adjust the | |||
* current position and return one; if it isn't, return zero and don't | |||
* change the current position. | |||
*/ | |||
int | |||
mandoc_getcontrol(const char *cp, int *ppos) | |||
{ | |||
int pos; | |||
pos = *ppos; | |||
if ('\\' == cp[pos] && '.' == cp[pos + 1]) | |||
pos += 2; | |||
else if ('.' == cp[pos] || '\'' == cp[pos]) | |||
pos++; | |||
else | |||
return(0); | |||
while (' ' == cp[pos] || '\t' == cp[pos]) | |||
pos++; | |||
*ppos = pos; | |||
return(1); | |||
} | |||
/* | |||
* Convert a string to a long that may not be <0. | |||
* If the string is invalid, or is less than 0, return -1. | |||
*/ | |||
int | |||
mandoc_strntou(const char *p, size_t sz, int base) | |||
{ | |||
char buf[32]; | |||
char *ep; | |||
long v; | |||
if (sz > 31) | |||
return(-1); | |||
memcpy(buf, p, sz); | |||
buf[(int)sz] = '\0'; | |||
errno = 0; | |||
v = strtol(buf, &ep, base); | |||
if (buf[0] == '\0' || *ep != '\0') | |||
return(-1); | |||
if ((errno == ERANGE && | |||
(v == LONG_MAX || v == LONG_MIN)) || | |||
(v > INT_MAX || v < 0)) | |||
return(-1); | |||
return((int)v); | |||
} | |||