![]() ![]() | ![]() |
version 1.59, 2011/09/18 14:14:15 | version 1.100, 2017/06/02 19:21:23 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> | * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> | ||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
|
|
||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | */ | ||
#ifdef HAVE_CONFIG_H | |||
#include "config.h" | #include "config.h" | ||
#endif | |||
#include <sys/types.h> | #include <sys/types.h> | ||
|
|
||
#include <time.h> | #include <time.h> | ||
#include "mandoc.h" | #include "mandoc.h" | ||
#include "mandoc_aux.h" | |||
#include "libmandoc.h" | #include "libmandoc.h" | ||
#define DATESIZE 32 | |||
static int a2time(time_t *, const char *, const char *); | static int a2time(time_t *, const char *, const char *); | ||
static char *time2a(time_t); | static char *time2a(time_t); | ||
static int numescape(const char *); | |||
/* | |||
* Pass over recursive numerical expressions. This context of this | enum mandoc_esc | ||
* function is important: it's only called within character-terminating | mandoc_escape(const char **end, const char **start, int *sz) | ||
* escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial | |||
* recursion: we don't care about what's in these blocks. | |||
* This returns the number of characters skipped or -1 if an error | |||
* occurs (the caller should bail). | |||
*/ | |||
static int | |||
numescape(const char *start) | |||
{ | { | ||
int i; | const char *local_start; | ||
size_t sz; | int local_sz; | ||
const char *cp; | char term; | ||
enum mandoc_esc gly; | |||
i = 0; | /* | ||
* When the caller doesn't provide return storage, | |||
* use local storage. | |||
*/ | |||
/* The expression consists of a subexpression. */ | if (NULL == start) | ||
start = &local_start; | |||
if (NULL == sz) | |||
sz = &local_sz; | |||
if ('\\' == start[i]) { | |||
cp = &start[++i]; | |||
/* | |||
* Read past the end of the subexpression. | |||
* Bail immediately on errors. | |||
*/ | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
return(i + cp - &start[i]); | |||
} | |||
if ('(' != start[i++]) | |||
return(0); | |||
/* | /* | ||
* A parenthesised subexpression. Read until the closing | * Beyond the backslash, at least one input character | ||
* parenthesis, making sure to handle any nested subexpressions | * is part of the escape sequence. With one exception | ||
* that might ruin our parse. | * (see below), that character won't be returned. | ||
*/ | */ | ||
while (')' != start[i]) { | |||
sz = strcspn(&start[i], ")\\"); | |||
i += (int)sz; | |||
if ('\0' == start[i]) | |||
return(-1); | |||
else if ('\\' != start[i]) | |||
continue; | |||
cp = &start[++i]; | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
i += cp - &start[i]; | |||
} | |||
/* Read past the terminating ')'. */ | |||
return(++i); | |||
} | |||
enum mandoc_esc | |||
mandoc_escape(const char **end, const char **start, int *sz) | |||
{ | |||
char c, term, numeric; | |||
int i, lim, ssz, rlim; | |||
const char *cp, *rstart; | |||
enum mandoc_esc gly; | |||
cp = *end; | |||
rstart = cp; | |||
if (start) | |||
*start = rstart; | |||
i = lim = 0; | |||
gly = ESCAPE_ERROR; | gly = ESCAPE_ERROR; | ||
term = numeric = '\0'; | *start = ++*end; | ||
*sz = 0; | |||
term = '\0'; | |||
switch ((c = cp[i++])) { | switch ((*start)[-1]) { | ||
/* | /* | ||
* First the glyphs. There are several different forms of | * First the glyphs. There are several different forms of | ||
* these, but each eventually returns a substring of the glyph | * these, but each eventually returns a substring of the glyph | ||
* name. | * name. | ||
*/ | */ | ||
case ('('): | case '(': | ||
gly = ESCAPE_SPECIAL; | gly = ESCAPE_SPECIAL; | ||
lim = 2; | *sz = 2; | ||
break; | break; | ||
case ('['): | case '[': | ||
gly = ESCAPE_SPECIAL; | gly = ESCAPE_SPECIAL; | ||
/* | |||
* Unicode escapes are defined in groff as \[uXXXX] to | |||
* \[u10FFFF], where the contained value must be a valid | |||
* Unicode codepoint. Here, however, only check whether | |||
* it's not a zero-width escape. | |||
*/ | |||
if ('u' == cp[i] && ']' != cp[i + 1]) | |||
gly = ESCAPE_UNICODE; | |||
term = ']'; | term = ']'; | ||
break; | break; | ||
case ('C'): | case 'C': | ||
if ('\'' != cp[i]) | if ('\'' != **start) | ||
return(ESCAPE_ERROR); | return ESCAPE_ERROR; | ||
*start = ++*end; | |||
gly = ESCAPE_SPECIAL; | gly = ESCAPE_SPECIAL; | ||
term = '\''; | term = '\''; | ||
break; | break; | ||
/* | /* | ||
* Escapes taking no arguments at all. | |||
*/ | |||
case 'd': | |||
case 'u': | |||
case ',': | |||
case '/': | |||
return ESCAPE_IGNORE; | |||
/* | |||
* The \z escape is supposed to output the following | |||
* character without advancing the cursor position. | |||
* Since we are mostly dealing with terminal mode, | |||
* let us just skip the next character. | |||
*/ | |||
case 'z': | |||
return ESCAPE_SKIPCHAR; | |||
/* | |||
* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where | * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where | ||
* 'X' is the trigger. These have opaque sub-strings. | * 'X' is the trigger. These have opaque sub-strings. | ||
*/ | */ | ||
case ('F'): | case 'F': | ||
case 'g': | |||
case 'k': | |||
case 'M': | |||
case 'm': | |||
case 'n': | |||
case 'V': | |||
case 'Y': | |||
gly = ESCAPE_IGNORE; | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('g'): | case 'f': | ||
/* FALLTHROUGH */ | |||
case ('k'): | |||
/* FALLTHROUGH */ | |||
case ('M'): | |||
/* FALLTHROUGH */ | |||
case ('m'): | |||
/* FALLTHROUGH */ | |||
case ('n'): | |||
/* FALLTHROUGH */ | |||
case ('V'): | |||
/* FALLTHROUGH */ | |||
case ('Y'): | |||
if (ESCAPE_ERROR == gly) | if (ESCAPE_ERROR == gly) | ||
gly = ESCAPE_IGNORE; | |||
/* FALLTHROUGH */ | |||
case ('f'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_FONT; | gly = ESCAPE_FONT; | ||
switch (**start) { | |||
rstart= &cp[i]; | case '(': | ||
if (start) | *start = ++*end; | ||
*start = rstart; | *sz = 2; | ||
switch (cp[i++]) { | |||
case ('('): | |||
lim = 2; | |||
break; | break; | ||
case ('['): | case '[': | ||
*start = ++*end; | |||
term = ']'; | term = ']'; | ||
break; | break; | ||
default: | default: | ||
lim = 1; | *sz = 1; | ||
i--; | |||
break; | break; | ||
} | } | ||
break; | break; | ||
|
|
||
/* | /* | ||
* These escapes are of the form \X'Y', where 'X' is the trigger | * These escapes are of the form \X'Y', where 'X' is the trigger | ||
* and 'Y' is any string. These have opaque sub-strings. | * and 'Y' is any string. These have opaque sub-strings. | ||
* The \B and \w escapes are handled in roff.c, roff_res(). | |||
*/ | */ | ||
case ('A'): | case 'A': | ||
/* FALLTHROUGH */ | case 'b': | ||
case ('b'): | case 'D': | ||
/* FALLTHROUGH */ | case 'R': | ||
case ('D'): | case 'X': | ||
/* FALLTHROUGH */ | case 'Z': | ||
case ('o'): | |||
/* FALLTHROUGH */ | |||
case ('R'): | |||
/* FALLTHROUGH */ | |||
case ('X'): | |||
/* FALLTHROUGH */ | |||
case ('Z'): | |||
if ('\'' != cp[i++]) | |||
return(ESCAPE_ERROR); | |||
gly = ESCAPE_IGNORE; | gly = ESCAPE_IGNORE; | ||
term = '\''; | /* FALLTHROUGH */ | ||
case 'o': | |||
if (**start == '\0') | |||
return ESCAPE_ERROR; | |||
if (gly == ESCAPE_ERROR) | |||
gly = ESCAPE_OVERSTRIKE; | |||
term = **start; | |||
*start = ++*end; | |||
break; | break; | ||
/* | /* | ||
* These escapes are of the form \X'N', where 'X' is the trigger | * These escapes are of the form \X'N', where 'X' is the trigger | ||
* and 'N' resolves to a numerical expression. | * and 'N' resolves to a numerical expression. | ||
*/ | */ | ||
case ('B'): | case 'h': | ||
/* FALLTHROUGH */ | case 'H': | ||
case ('h'): | case 'L': | ||
/* FALLTHROUGH */ | case 'l': | ||
case ('H'): | case 'S': | ||
/* FALLTHROUGH */ | case 'v': | ||
case ('L'): | case 'x': | ||
/* FALLTHROUGH */ | if (strchr(" %&()*+-./0123456789:<=>", **start)) { | ||
case ('l'): | if ('\0' != **start) | ||
/* FALLTHROUGH */ | ++*end; | ||
case ('N'): | return ESCAPE_ERROR; | ||
if (ESCAPE_ERROR == gly) | } | ||
gly = ESCAPE_NUMBERED; | switch ((*start)[-1]) { | ||
/* FALLTHROUGH */ | case 'h': | ||
case ('S'): | gly = ESCAPE_HORIZ; | ||
/* FALLTHROUGH */ | break; | ||
case ('v'): | case 'l': | ||
/* FALLTHROUGH */ | gly = ESCAPE_HLINE; | ||
case ('w'): | break; | ||
/* FALLTHROUGH */ | default: | ||
case ('x'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | gly = ESCAPE_IGNORE; | ||
if ('\'' != cp[i++]) | break; | ||
return(ESCAPE_ERROR); | } | ||
term = numeric = '\''; | term = **start; | ||
*start = ++*end; | |||
break; | break; | ||
/* | /* | ||
* Special handling for the numbered character escape. | |||
* XXX Do any other escapes need similar handling? | |||
*/ | |||
case 'N': | |||
if ('\0' == **start) | |||
return ESCAPE_ERROR; | |||
(*end)++; | |||
if (isdigit((unsigned char)**start)) { | |||
*sz = 1; | |||
return ESCAPE_IGNORE; | |||
} | |||
(*start)++; | |||
while (isdigit((unsigned char)**end)) | |||
(*end)++; | |||
*sz = *end - *start; | |||
if ('\0' != **end) | |||
(*end)++; | |||
return ESCAPE_NUMBERED; | |||
/* | |||
* Sizes get a special category of their own. | * Sizes get a special category of their own. | ||
*/ | */ | ||
case ('s'): | case 's': | ||
gly = ESCAPE_IGNORE; | gly = ESCAPE_IGNORE; | ||
rstart = &cp[i]; | |||
if (start) | |||
*start = rstart; | |||
/* See +/- counts as a sign. */ | /* See +/- counts as a sign. */ | ||
c = cp[i]; | if ('+' == **end || '-' == **end || ASCII_HYPH == **end) | ||
if ('+' == c || '-' == c || ASCII_HYPH == c) | *start = ++*end; | ||
++i; | |||
switch (cp[i++]) { | switch (**end) { | ||
case ('('): | case '(': | ||
lim = 2; | *start = ++*end; | ||
*sz = 2; | |||
break; | break; | ||
case ('['): | case '[': | ||
term = numeric = ']'; | *start = ++*end; | ||
term = ']'; | |||
break; | break; | ||
case ('\''): | case '\'': | ||
term = numeric = '\''; | *start = ++*end; | ||
term = '\''; | |||
break; | break; | ||
case '3': | |||
case '2': | |||
case '1': | |||
*sz = (*end)[-1] == 's' && | |||
isdigit((unsigned char)(*end)[1]) ? 2 : 1; | |||
break; | |||
default: | default: | ||
lim = 1; | *sz = 1; | ||
i--; | |||
break; | break; | ||
} | } | ||
/* See +/- counts as a sign. */ | |||
c = cp[i]; | |||
if ('+' == c || '-' == c || ASCII_HYPH == c) | |||
++i; | |||
break; | break; | ||
/* | /* | ||
* Anything else is assumed to be a glyph. | * Anything else is assumed to be a glyph. | ||
* In this case, pass back the character after the backslash. | |||
*/ | */ | ||
default: | default: | ||
gly = ESCAPE_SPECIAL; | gly = ESCAPE_SPECIAL; | ||
lim = 1; | *start = --*end; | ||
i--; | *sz = 1; | ||
break; | break; | ||
} | } | ||
assert(ESCAPE_ERROR != gly); | assert(ESCAPE_ERROR != gly); | ||
rstart = &cp[i]; | |||
if (start) | |||
*start = rstart; | |||
/* | /* | ||
* If a terminating block has been specified, we need to | * Read up to the terminating character, | ||
* handle the case of recursion, which could have their | * paying attention to nested escapes. | ||
* own terminating blocks that mess up our parse. This, by the | |||
* way, means that the "start" and "size" values will be | |||
* effectively meaningless. | |||
*/ | */ | ||
ssz = 0; | |||
if (numeric && -1 == (ssz = numescape(&cp[i]))) | |||
return(ESCAPE_ERROR); | |||
i += ssz; | |||
rlim = -1; | |||
/* | |||
* We have a character terminator. Try to read up to that | |||
* character. If we can't (i.e., we hit the nil), then return | |||
* an error; if we can, calculate our length, read past the | |||
* terminating character, and exit. | |||
*/ | |||
if ('\0' != term) { | if ('\0' != term) { | ||
*end = strchr(&cp[i], term); | while (**end != term) { | ||
if ('\0' == *end) | switch (**end) { | ||
return(ESCAPE_ERROR); | case '\0': | ||
return ESCAPE_ERROR; | |||
rlim = *end - &cp[i]; | case '\\': | ||
if (sz) | (*end)++; | ||
*sz = rlim; | if (ESCAPE_ERROR == | ||
(*end)++; | mandoc_escape(end, NULL, NULL)) | ||
goto out; | return ESCAPE_ERROR; | ||
break; | |||
default: | |||
(*end)++; | |||
break; | |||
} | |||
} | |||
*sz = (*end)++ - *start; | |||
} else { | |||
assert(*sz > 0); | |||
if ((size_t)*sz > strlen(*start)) | |||
return ESCAPE_ERROR; | |||
*end += *sz; | |||
} | } | ||
assert(lim > 0); | |||
/* | |||
* We have a numeric limit. If the string is shorter than that, | |||
* stop and return an error. Else adjust our endpoint, length, | |||
* and return the current glyph. | |||
*/ | |||
if ((size_t)lim > strlen(&cp[i])) | |||
return(ESCAPE_ERROR); | |||
rlim = lim; | |||
if (sz) | |||
*sz = rlim; | |||
*end = &cp[i] + lim; | |||
out: | |||
assert(rlim >= 0 && rstart); | |||
/* Run post-processors. */ | /* Run post-processors. */ | ||
switch (gly) { | switch (gly) { | ||
case (ESCAPE_FONT): | case ESCAPE_FONT: | ||
if (1 != rlim) | if (2 == *sz) { | ||
if ('C' == **start) { | |||
/* | |||
* Treat constant-width font modes | |||
* just like regular font modes. | |||
*/ | |||
(*start)++; | |||
(*sz)--; | |||
} else { | |||
if ('B' == (*start)[0] && 'I' == (*start)[1]) | |||
gly = ESCAPE_FONTBI; | |||
break; | |||
} | |||
} else if (1 != *sz) | |||
break; | break; | ||
switch (*rstart) { | |||
case ('3'): | switch (**start) { | ||
/* FALLTHROUGH */ | case '3': | ||
case ('B'): | case 'B': | ||
gly = ESCAPE_FONTBOLD; | gly = ESCAPE_FONTBOLD; | ||
break; | break; | ||
case ('2'): | case '2': | ||
/* FALLTHROUGH */ | case 'I': | ||
case ('I'): | |||
gly = ESCAPE_FONTITALIC; | gly = ESCAPE_FONTITALIC; | ||
break; | break; | ||
case ('P'): | case 'P': | ||
gly = ESCAPE_FONTPREV; | gly = ESCAPE_FONTPREV; | ||
break; | break; | ||
case ('1'): | case '1': | ||
/* FALLTHROUGH */ | case 'R': | ||
case ('R'): | |||
gly = ESCAPE_FONTROMAN; | gly = ESCAPE_FONTROMAN; | ||
break; | break; | ||
} | } | ||
break; | break; | ||
case (ESCAPE_SPECIAL): | case ESCAPE_SPECIAL: | ||
if (1 != rlim) | if (1 == *sz && 'c' == **start) | ||
break; | |||
if ('c' == *rstart) | |||
gly = ESCAPE_NOSPACE; | gly = ESCAPE_NOSPACE; | ||
/* | |||
* Unicode escapes are defined in groff as \[u0000] | |||
* to \[u10FFFF], where the contained value must be | |||
* a valid Unicode codepoint. Here, however, only | |||
* check the length and range. | |||
*/ | |||
if (**start != 'u' || *sz < 5 || *sz > 7) | |||
break; | |||
if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) | |||
break; | |||
if (*sz == 6 && (*start)[1] == '0') | |||
break; | |||
if (*sz == 5 && (*start)[1] == 'D' && | |||
strchr("89ABCDEF", (*start)[2]) != NULL) | |||
break; | |||
if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") | |||
+ 1 == *sz) | |||
gly = ESCAPE_UNICODE; | |||
break; | break; | ||
default: | default: | ||
break; | break; | ||
} | } | ||
return(gly); | return gly; | ||
} | } | ||
void * | |||
mandoc_calloc(size_t num, size_t size) | |||
{ | |||
void *ptr; | |||
ptr = calloc(num, size); | |||
if (NULL == ptr) { | |||
perror(NULL); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
} | |||
return(ptr); | |||
} | |||
void * | |||
mandoc_malloc(size_t size) | |||
{ | |||
void *ptr; | |||
ptr = malloc(size); | |||
if (NULL == ptr) { | |||
perror(NULL); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
} | |||
return(ptr); | |||
} | |||
void * | |||
mandoc_realloc(void *ptr, size_t size) | |||
{ | |||
ptr = realloc(ptr, size); | |||
if (NULL == ptr) { | |||
perror(NULL); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
} | |||
return(ptr); | |||
} | |||
char * | |||
mandoc_strndup(const char *ptr, size_t sz) | |||
{ | |||
char *p; | |||
p = mandoc_malloc(sz + 1); | |||
memcpy(p, ptr, sz); | |||
p[(int)sz] = '\0'; | |||
return(p); | |||
} | |||
char * | |||
mandoc_strdup(const char *ptr) | |||
{ | |||
char *p; | |||
p = strdup(ptr); | |||
if (NULL == p) { | |||
perror(NULL); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
} | |||
return(p); | |||
} | |||
/* | /* | ||
* Parse a quoted or unquoted roff-style request or macro argument. | * Parse a quoted or unquoted roff-style request or macro argument. | ||
* Return a pointer to the parsed argument, which is either the original | * Return a pointer to the parsed argument, which is either the original | ||
* pointer or advanced by one byte in case the argument is quoted. | * pointer or advanced by one byte in case the argument is quoted. | ||
* Null-terminate the argument in place. | * NUL-terminate the argument in place. | ||
* Collapse pairs of quotes inside quoted arguments. | * Collapse pairs of quotes inside quoted arguments. | ||
* Advance the argument pointer to the next argument, | * Advance the argument pointer to the next argument, | ||
* or to the null byte terminating the argument line. | * or to the NUL byte terminating the argument line. | ||
*/ | */ | ||
char * | char * | ||
mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) | mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) | ||
|
|
||
if ('"' == *start) { | if ('"' == *start) { | ||
quoted = 1; | quoted = 1; | ||
start++; | start++; | ||
} | } | ||
pairs = 0; | pairs = 0; | ||
white = 0; | white = 0; | ||
for (cp = start; '\0' != *cp; cp++) { | for (cp = start; '\0' != *cp; cp++) { | ||
/* Move left after quoted quotes and escaped backslashes. */ | |||
/* | |||
* Move the following text left | |||
* after quoted quotes and after "\\" and "\t". | |||
*/ | |||
if (pairs) | if (pairs) | ||
cp[-pairs] = cp[0]; | cp[-pairs] = cp[0]; | ||
if ('\\' == cp[0]) { | if ('\\' == cp[0]) { | ||
if ('\\' == cp[1]) { | /* | ||
/* Poor man's copy mode. */ | * In copy mode, translate double to single | ||
* backslashes and backslash-t to literal tabs. | |||
*/ | |||
switch (cp[1]) { | |||
case 't': | |||
cp[0] = '\t'; | |||
/* FALLTHROUGH */ | |||
case '\\': | |||
pairs++; | pairs++; | ||
cp++; | cp++; | ||
} else if (0 == quoted && ' ' == cp[1]) | break; | ||
case ' ': | |||
/* Skip escaped blanks. */ | /* Skip escaped blanks. */ | ||
cp++; | if (0 == quoted) | ||
cp++; | |||
break; | |||
default: | |||
break; | |||
} | |||
} else if (0 == quoted) { | } else if (0 == quoted) { | ||
if (' ' == cp[0]) { | if (' ' == cp[0]) { | ||
/* Unescaped blanks end unquoted args. */ | /* Unescaped blanks end unquoted args. */ | ||
|
|
||
/* Quoted argument without a closing quote. */ | /* Quoted argument without a closing quote. */ | ||
if (1 == quoted) | if (1 == quoted) | ||
mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); | mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL); | ||
/* Null-terminate this argument and move to the next one. */ | /* NUL-terminate this argument and move to the next one. */ | ||
if (pairs) | if (pairs) | ||
cp[-pairs] = '\0'; | cp[-pairs] = '\0'; | ||
if ('\0' != *cp) { | if ('\0' != *cp) { | ||
|
|
||
*cpp = cp; | *cpp = cp; | ||
if ('\0' == *cp && (white || ' ' == cp[-1])) | if ('\0' == *cp && (white || ' ' == cp[-1])) | ||
mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); | mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL); | ||
return(start); | return start; | ||
} | } | ||
static int | static int | ||
|
|
||
memset(&tm, 0, sizeof(struct tm)); | memset(&tm, 0, sizeof(struct tm)); | ||
pp = NULL; | pp = NULL; | ||
#ifdef HAVE_STRPTIME | #if HAVE_STRPTIME | ||
pp = strptime(p, fmt, &tm); | pp = strptime(p, fmt, &tm); | ||
#endif | #endif | ||
if (NULL != pp && '\0' == *pp) { | if (NULL != pp && '\0' == *pp) { | ||
*t = mktime(&tm); | *t = mktime(&tm); | ||
return(1); | return 1; | ||
} | } | ||
return(0); | return 0; | ||
} | } | ||
static char * | static char * | ||
|
|
||
int isz; | int isz; | ||
tm = localtime(&t); | tm = localtime(&t); | ||
if (tm == NULL) | |||
return NULL; | |||
/* | /* | ||
* Reserve space: | * Reserve space: | ||
|
|
||
* up to 2 characters for the day + comma + blank | * up to 2 characters for the day + comma + blank | ||
* 4 characters for the year and a terminating '\0' | * 4 characters for the year and a terminating '\0' | ||
*/ | */ | ||
p = buf = mandoc_malloc(10 + 4 + 4 + 1); | p = buf = mandoc_malloc(10 + 4 + 4 + 1); | ||
if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) | if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) | ||
goto fail; | goto fail; | ||
p += (int)ssz; | p += (int)ssz; | ||
if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) | /* | ||
* The output format is just "%d" here, not "%2d" or "%02d". | |||
* That's also the reason why we can't just format the | |||
* date as a whole with "%B %e, %Y" or "%B %d, %Y". | |||
* Besides, the present approach is less prone to buffer | |||
* overflows, in case anybody should ever introduce the bug | |||
* of looking at LC_TIME. | |||
*/ | |||
if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1) | |||
goto fail; | goto fail; | ||
p += isz; | p += isz; | ||
if (0 == strftime(p, 4 + 1, "%Y", tm)) | if (strftime(p, 4 + 1, "%Y", tm) == 0) | ||
goto fail; | goto fail; | ||
return(buf); | return buf; | ||
fail: | fail: | ||
free(buf); | free(buf); | ||
return(NULL); | return NULL; | ||
} | } | ||
char * | char * | ||
mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) | mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) | ||
{ | { | ||
char *out; | |||
time_t t; | time_t t; | ||
if (NULL == in || '\0' == *in || | /* No date specified: use today's date. */ | ||
0 == strcmp(in, "$" "Mdocdate$")) { | |||
mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); | if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) { | ||
time(&t); | mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL); | ||
return time2a(time(NULL)); | |||
} | } | ||
else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | |||
!a2time(&t, "%b %d, %Y", in) && | /* Valid mdoc(7) date format. */ | ||
!a2time(&t, "%Y-%m-%d", in)) { | |||
mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); | if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) || | ||
t = 0; | a2time(&t, "%b %d, %Y", in)) | ||
} | return time2a(t); | ||
out = t ? time2a(t) : NULL; | |||
return(out ? out : mandoc_strdup(in)); | /* Do not warn about the legacy man(7) format. */ | ||
if ( ! a2time(&t, "%Y-%m-%d", in)) | |||
mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in); | |||
/* Use any non-mdoc(7) date verbatim. */ | |||
return mandoc_strdup(in); | |||
} | } | ||
int | int | ||
mandoc_eos(const char *p, size_t sz, int enclosed) | mandoc_eos(const char *p, size_t sz) | ||
{ | { | ||
const char *q; | const char *q; | ||
int found; | int enclosed, found; | ||
if (0 == sz) | if (0 == sz) | ||
return(0); | return 0; | ||
/* | /* | ||
* End-of-sentence recognition must include situations where | * End-of-sentence recognition must include situations where | ||
|
|
||
* propagate outward. | * propagate outward. | ||
*/ | */ | ||
found = 0; | enclosed = found = 0; | ||
for (q = p + (int)sz - 1; q >= p; q--) { | for (q = p + (int)sz - 1; q >= p; q--) { | ||
switch (*q) { | switch (*q) { | ||
case ('\"'): | case '\"': | ||
/* FALLTHROUGH */ | case '\'': | ||
case ('\''): | case ']': | ||
/* FALLTHROUGH */ | case ')': | ||
case (']'): | |||
/* FALLTHROUGH */ | |||
case (')'): | |||
if (0 == found) | if (0 == found) | ||
enclosed = 1; | enclosed = 1; | ||
break; | break; | ||
case ('.'): | case '.': | ||
/* FALLTHROUGH */ | case '!': | ||
case ('!'): | case '?': | ||
/* FALLTHROUGH */ | |||
case ('?'): | |||
found = 1; | found = 1; | ||
break; | break; | ||
default: | default: | ||
return(found && (!enclosed || isalnum((unsigned char)*q))); | return found && | ||
(!enclosed || isalnum((unsigned char)*q)); | |||
} | } | ||
} | } | ||
return(found && !enclosed); | return found && !enclosed; | ||
} | } | ||
/* | /* | ||
* Find out whether a line is a macro line or not. If it is, adjust the | |||
* current position and return one; if it isn't, return zero and don't | |||
* change the current position. | |||
*/ | |||
int | |||
mandoc_getcontrol(const char *cp, int *ppos) | |||
{ | |||
int pos; | |||
pos = *ppos; | |||
if ('\\' == cp[pos] && '.' == cp[pos + 1]) | |||
pos += 2; | |||
else if ('.' == cp[pos] || '\'' == cp[pos]) | |||
pos++; | |||
else | |||
return(0); | |||
while (' ' == cp[pos] || '\t' == cp[pos]) | |||
pos++; | |||
*ppos = pos; | |||
return(1); | |||
} | |||
/* | |||
* Convert a string to a long that may not be <0. | * Convert a string to a long that may not be <0. | ||
* If the string is invalid, or is less than 0, return -1. | * If the string is invalid, or is less than 0, return -1. | ||
*/ | */ | ||
|
|
||
long v; | long v; | ||
if (sz > 31) | if (sz > 31) | ||
return(-1); | return -1; | ||
memcpy(buf, p, sz); | memcpy(buf, p, sz); | ||
buf[(int)sz] = '\0'; | buf[(int)sz] = '\0'; | ||
|
|
||
v = strtol(buf, &ep, base); | v = strtol(buf, &ep, base); | ||
if (buf[0] == '\0' || *ep != '\0') | if (buf[0] == '\0' || *ep != '\0') | ||
return(-1); | return -1; | ||
if (v > INT_MAX) | if (v > INT_MAX) | ||
v = INT_MAX; | v = INT_MAX; | ||
if (v < INT_MIN) | if (v < INT_MIN) | ||
v = INT_MIN; | v = INT_MIN; | ||
return((int)v); | return (int)v; | ||
} | } |