Return to mandoc.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.37, 2011/03/07 01:35:51 | version 1.70, 2013/11/10 21:34:04 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> | * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> | ||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
|
|
||
#include <assert.h> | #include <assert.h> | ||
#include <ctype.h> | #include <ctype.h> | ||
#include <errno.h> | |||
#include <limits.h> | |||
#include <stdlib.h> | #include <stdlib.h> | ||
#include <stdio.h> | #include <stdio.h> | ||
#include <string.h> | #include <string.h> | ||
|
|
||
static int a2time(time_t *, const char *, const char *); | static int a2time(time_t *, const char *, const char *); | ||
static char *time2a(time_t); | static char *time2a(time_t); | ||
int | |||
mandoc_special(char *p) | enum mandoc_esc | ||
mandoc_escape(const char const **end, const char const **start, int *sz) | |||
{ | { | ||
int len, i; | const char *local_start; | ||
int local_sz; | |||
char term; | char term; | ||
char *sv; | enum mandoc_esc gly; | ||
len = 0; | /* | ||
* When the caller doesn't provide return storage, | |||
* use local storage. | |||
*/ | |||
if (NULL == start) | |||
start = &local_start; | |||
if (NULL == sz) | |||
sz = &local_sz; | |||
/* | |||
* Beyond the backslash, at least one input character | |||
* is part of the escape sequence. With one exception | |||
* (see below), that character won't be returned. | |||
*/ | |||
gly = ESCAPE_ERROR; | |||
*start = ++*end; | |||
*sz = 0; | |||
term = '\0'; | term = '\0'; | ||
sv = p; | |||
assert('\\' == *p); | switch ((*start)[-1]) { | ||
p++; | /* | ||
* First the glyphs. There are several different forms of | |||
* these, but each eventually returns a substring of the glyph | |||
* name. | |||
*/ | |||
case ('('): | |||
gly = ESCAPE_SPECIAL; | |||
*sz = 2; | |||
break; | |||
case ('['): | |||
gly = ESCAPE_SPECIAL; | |||
/* | |||
* Unicode escapes are defined in groff as \[uXXXX] to | |||
* \[u10FFFF], where the contained value must be a valid | |||
* Unicode codepoint. Here, however, only check whether | |||
* it's not a zero-width escape. | |||
*/ | |||
if ('u' == (*start)[0] && ']' != (*start)[1]) | |||
gly = ESCAPE_UNICODE; | |||
term = ']'; | |||
break; | |||
case ('C'): | |||
if ('\'' != **start) | |||
return(ESCAPE_ERROR); | |||
*start = ++*end; | |||
if ('u' == (*start)[0] && '\'' != (*start)[1]) | |||
gly = ESCAPE_UNICODE; | |||
else | |||
gly = ESCAPE_SPECIAL; | |||
term = '\''; | |||
break; | |||
switch (*p++) { | /* | ||
#if 0 | * The \z escape is supposed to output the following | ||
case ('Z'): | * character without advancing the cursor position. | ||
* Since we are mostly dealing with terminal mode, | |||
* let us just skip the next character. | |||
*/ | |||
case ('z'): | |||
return(ESCAPE_SKIPCHAR); | |||
/* | |||
* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where | |||
* 'X' is the trigger. These have opaque sub-strings. | |||
*/ | |||
case ('F'): | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('X'): | case ('g'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('x'): | case ('k'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('S'): | case ('M'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('R'): | case ('m'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('N'): | case ('n'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('l'): | case ('V'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('L'): | case ('Y'): | ||
gly = ESCAPE_IGNORE; | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('H'): | case ('f'): | ||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_FONT; | |||
switch (**start) { | |||
case ('('): | |||
*start = ++*end; | |||
*sz = 2; | |||
break; | |||
case ('['): | |||
*start = ++*end; | |||
term = ']'; | |||
break; | |||
default: | |||
*sz = 1; | |||
break; | |||
} | |||
break; | |||
/* | |||
* These escapes are of the form \X'Y', where 'X' is the trigger | |||
* and 'Y' is any string. These have opaque sub-strings. | |||
*/ | |||
case ('A'): | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('h'): | case ('b'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('D'): | case ('D'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('C'): | case ('o'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('b'): | case ('R'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('B'): | case ('X'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('a'): | case ('Z'): | ||
/* FALLTHROUGH */ | if ('\'' != **start) | ||
case ('A'): | return(ESCAPE_ERROR); | ||
if (*p++ != '\'') | gly = ESCAPE_IGNORE; | ||
return(0); | *start = ++*end; | ||
term = '\''; | term = '\''; | ||
break; | break; | ||
#endif | |||
/* | |||
* These escapes are of the form \X'N', where 'X' is the trigger | |||
* and 'N' resolves to a numerical expression. | |||
*/ | |||
case ('B'): | |||
/* FALLTHROUGH */ | |||
case ('h'): | case ('h'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('H'): | |||
/* FALLTHROUGH */ | |||
case ('L'): | |||
/* FALLTHROUGH */ | |||
case ('l'): | |||
gly = ESCAPE_NUMBERED; | |||
/* FALLTHROUGH */ | |||
case ('S'): | |||
/* FALLTHROUGH */ | |||
case ('v'): | case ('v'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('s'): | case ('w'): | ||
if (ASCII_HYPH == *p) | /* FALLTHROUGH */ | ||
*p = '-'; | case ('x'): | ||
if ('\'' != **start) | |||
return(ESCAPE_ERROR); | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | |||
*start = ++*end; | |||
term = '\''; | |||
break; | |||
i = 0; | /* | ||
if ('+' == *p || '-' == *p) { | * Special handling for the numbered character escape. | ||
p++; | * XXX Do any other escapes need similar handling? | ||
i = 1; | */ | ||
case ('N'): | |||
if ('\0' == **start) | |||
return(ESCAPE_ERROR); | |||
(*end)++; | |||
if (isdigit((unsigned char)**start)) { | |||
*sz = 1; | |||
return(ESCAPE_IGNORE); | |||
} | } | ||
(*start)++; | |||
while (isdigit((unsigned char)**end)) | |||
(*end)++; | |||
*sz = *end - *start; | |||
if ('\0' != **end) | |||
(*end)++; | |||
return(ESCAPE_NUMBERED); | |||
switch (*p++) { | /* | ||
* Sizes get a special category of their own. | |||
*/ | |||
case ('s'): | |||
gly = ESCAPE_IGNORE; | |||
/* See +/- counts as a sign. */ | |||
if ('+' == **end || '-' == **end || ASCII_HYPH == **end) | |||
(*end)++; | |||
switch (**end) { | |||
case ('('): | case ('('): | ||
len = 2; | *start = ++*end; | ||
*sz = 2; | |||
break; | break; | ||
case ('['): | case ('['): | ||
*start = ++*end; | |||
term = ']'; | term = ']'; | ||
break; | break; | ||
case ('\''): | case ('\''): | ||
*start = ++*end; | |||
term = '\''; | term = '\''; | ||
break; | break; | ||
case ('0'): | |||
i = 1; | |||
/* FALLTHROUGH */ | |||
default: | default: | ||
len = 1; | *sz = 1; | ||
p--; | |||
break; | break; | ||
} | } | ||
if (ASCII_HYPH == *p) | break; | ||
*p = '-'; | |||
if ('+' == *p || '-' == *p) { | |||
if (i) | |||
return(0); | |||
p++; | |||
} | |||
/* Handle embedded numerical subexp or escape. */ | |||
if ('(' == *p) { | /* | ||
while (*p && ')' != *p) | * Anything else is assumed to be a glyph. | ||
if ('\\' == *p++) { | * In this case, pass back the character after the backslash. | ||
i = mandoc_special(--p); | */ | ||
if (0 == i) | default: | ||
return(0); | gly = ESCAPE_SPECIAL; | ||
p += i; | *start = --*end; | ||
} | *sz = 1; | ||
break; | |||
} | |||
if (')' == *p++) | assert(ESCAPE_ERROR != gly); | ||
break; | |||
return(0); | /* | ||
} else if ('\\' == *p) { | * Read up to the terminating character, | ||
if (0 == (i = mandoc_special(p))) | * paying attention to nested escapes. | ||
return(0); | */ | ||
p += i; | |||
if ('\0' != term) { | |||
while (**end != term) { | |||
switch (**end) { | |||
case ('\0'): | |||
return(ESCAPE_ERROR); | |||
case ('\\'): | |||
(*end)++; | |||
if (ESCAPE_ERROR == | |||
mandoc_escape(end, NULL, NULL)) | |||
return(ESCAPE_ERROR); | |||
break; | |||
default: | |||
(*end)++; | |||
break; | |||
} | |||
} | } | ||
*sz = (*end)++ - *start; | |||
} else { | |||
assert(*sz > 0); | |||
if ((size_t)*sz > strlen(*start)) | |||
return(ESCAPE_ERROR); | |||
*end += *sz; | |||
} | |||
break; | /* Run post-processors. */ | ||
#if 0 | |||
case ('Y'): | switch (gly) { | ||
/* FALLTHROUGH */ | case (ESCAPE_FONT): | ||
case ('V'): | if (2 == *sz) { | ||
/* FALLTHROUGH */ | if ('C' == **start) { | ||
case ('$'): | /* | ||
/* FALLTHROUGH */ | * Treat constant-width font modes | ||
case ('n'): | * just like regular font modes. | ||
/* FALLTHROUGH */ | */ | ||
#endif | (*start)++; | ||
case ('k'): | (*sz)--; | ||
/* FALLTHROUGH */ | } else { | ||
case ('M'): | if ('B' == (*start)[0] && 'I' == (*start)[1]) | ||
/* FALLTHROUGH */ | gly = ESCAPE_FONTBI; | ||
case ('m'): | break; | ||
/* FALLTHROUGH */ | } | ||
case ('f'): | } else if (1 != *sz) | ||
/* FALLTHROUGH */ | |||
case ('F'): | |||
/* FALLTHROUGH */ | |||
case ('*'): | |||
switch (*p++) { | |||
case ('('): | |||
len = 2; | |||
break; | break; | ||
case ('['): | |||
term = ']'; | switch (**start) { | ||
case ('3'): | |||
/* FALLTHROUGH */ | |||
case ('B'): | |||
gly = ESCAPE_FONTBOLD; | |||
break; | break; | ||
default: | case ('2'): | ||
len = 1; | /* FALLTHROUGH */ | ||
p--; | case ('I'): | ||
gly = ESCAPE_FONTITALIC; | |||
break; | break; | ||
case ('P'): | |||
gly = ESCAPE_FONTPREV; | |||
break; | |||
case ('1'): | |||
/* FALLTHROUGH */ | |||
case ('R'): | |||
gly = ESCAPE_FONTROMAN; | |||
break; | |||
} | } | ||
break; | break; | ||
case ('('): | case (ESCAPE_SPECIAL): | ||
len = 2; | if (1 == *sz && 'c' == **start) | ||
gly = ESCAPE_NOSPACE; | |||
break; | break; | ||
case ('['): | |||
term = ']'; | |||
break; | |||
case ('z'): | |||
len = 1; | |||
if ('\\' == *p) { | |||
if (0 == (i = mandoc_special(p))) | |||
return(0); | |||
p += i; | |||
return(*p ? (int)(p - sv) : 0); | |||
} | |||
break; | |||
case ('o'): | |||
/* FALLTHROUGH */ | |||
case ('w'): | |||
if ('\'' == *p++) { | |||
term = '\''; | |||
break; | |||
} | |||
/* FALLTHROUGH */ | |||
default: | default: | ||
len = 1; | |||
p--; | |||
break; | break; | ||
} | } | ||
if (term) { | return(gly); | ||
for ( ; *p && term != *p; p++) | |||
if (ASCII_HYPH == *p) | |||
*p = '-'; | |||
return(*p ? (int)(p - sv) : 0); | |||
} | |||
for (i = 0; *p && i < len; i++, p++) | |||
if (ASCII_HYPH == *p) | |||
*p = '-'; | |||
return(i == len ? (int)(p - sv) : 0); | |||
} | } | ||
void * | void * | ||
mandoc_calloc(size_t num, size_t size) | mandoc_calloc(size_t num, size_t size) | ||
{ | { | ||
|
|
||
return(ptr); | return(ptr); | ||
} | } | ||
char * | |||
mandoc_strndup(const char *ptr, size_t sz) | |||
{ | |||
char *p; | |||
p = mandoc_malloc(sz + 1); | |||
memcpy(p, ptr, sz); | |||
p[(int)sz] = '\0'; | |||
return(p); | |||
} | |||
char * | char * | ||
mandoc_strdup(const char *ptr) | mandoc_strdup(const char *ptr) | ||
{ | { | ||
|
|
||
* or to the null byte terminating the argument line. | * or to the null byte terminating the argument line. | ||
*/ | */ | ||
char * | char * | ||
mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos) | mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) | ||
{ | { | ||
char *start, *cp; | char *start, *cp; | ||
int quoted, pairs, white; | int quoted, pairs, white; | ||
/* Quoting can only start with a new word. */ | /* Quoting can only start with a new word. */ | ||
start = *cpp; | start = *cpp; | ||
quoted = 0; | |||
if ('"' == *start) { | if ('"' == *start) { | ||
quoted = 1; | quoted = 1; | ||
start++; | start++; | ||
} else | } | ||
quoted = 0; | |||
pairs = 0; | pairs = 0; | ||
white = 0; | white = 0; | ||
for (cp = start; '\0' != *cp; cp++) { | for (cp = start; '\0' != *cp; cp++) { | ||
/* Move left after quoted quotes and escaped backslashes. */ | |||
/* | |||
* Move the following text left | |||
* after quoted quotes and after "\\" and "\t". | |||
*/ | |||
if (pairs) | if (pairs) | ||
cp[-pairs] = cp[0]; | cp[-pairs] = cp[0]; | ||
if ('\\' == cp[0]) { | if ('\\' == cp[0]) { | ||
if ('\\' == cp[1]) { | /* | ||
/* Poor man's copy mode. */ | * In copy mode, translate double to single | ||
* backslashes and backslash-t to literal tabs. | |||
*/ | |||
switch (cp[1]) { | |||
case ('t'): | |||
cp[0] = '\t'; | |||
/* FALLTHROUGH */ | |||
case ('\\'): | |||
pairs++; | pairs++; | ||
cp++; | cp++; | ||
} else if (0 == quoted && ' ' == cp[1]) | break; | ||
case (' '): | |||
/* Skip escaped blanks. */ | /* Skip escaped blanks. */ | ||
cp++; | if (0 == quoted) | ||
cp++; | |||
break; | |||
default: | |||
break; | |||
} | |||
} else if (0 == quoted) { | } else if (0 == quoted) { | ||
if (' ' == cp[0]) { | if (' ' == cp[0]) { | ||
/* Unescaped blanks end unquoted args. */ | /* Unescaped blanks end unquoted args. */ | ||
|
|
||
} | } | ||
/* Quoted argument without a closing quote. */ | /* Quoted argument without a closing quote. */ | ||
if (1 == quoted && msg) | if (1 == quoted) | ||
(*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL); | mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); | ||
/* Null-terminate this argument and move to the next one. */ | /* Null-terminate this argument and move to the next one. */ | ||
if (pairs) | if (pairs) | ||
|
|
||
while (' ' == *cp) | while (' ' == *cp) | ||
cp++; | cp++; | ||
} | } | ||
*pos += (cp - start) + (quoted ? 1 : 0); | *pos += (int)(cp - start) + (quoted ? 1 : 0); | ||
*cpp = cp; | *cpp = cp; | ||
if ('\0' == *cp && msg && (white || ' ' == cp[-1])) | if ('\0' == *cp && (white || ' ' == cp[-1])) | ||
(*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL); | mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); | ||
return(start); | return(start); | ||
} | } | ||
static int | static int | ||
a2time(time_t *t, const char *fmt, const char *p) | a2time(time_t *t, const char *fmt, const char *p) | ||
{ | { | ||
|
|
||
memset(&tm, 0, sizeof(struct tm)); | memset(&tm, 0, sizeof(struct tm)); | ||
pp = NULL; | |||
#ifdef HAVE_STRPTIME | |||
pp = strptime(p, fmt, &tm); | pp = strptime(p, fmt, &tm); | ||
#endif | |||
if (NULL != pp && '\0' == *pp) { | if (NULL != pp && '\0' == *pp) { | ||
*t = mktime(&tm); | *t = mktime(&tm); | ||
return(1); | return(1); | ||
|
|
||
return(0); | return(0); | ||
} | } | ||
static char * | static char * | ||
time2a(time_t t) | time2a(time_t t) | ||
{ | { | ||
struct tm tm; | struct tm *tm; | ||
char buf[DATESIZE]; | char *buf, *p; | ||
char *p; | size_t ssz; | ||
size_t nsz, rsz; | |||
int isz; | int isz; | ||
localtime_r(&t, &tm); | tm = localtime(&t); | ||
p = buf; | /* | ||
rsz = DATESIZE; | * Reserve space: | ||
* up to 9 characters for the month (September) + blank | |||
* up to 2 characters for the day + comma + blank | |||
* 4 characters for the year and a terminating '\0' | |||
*/ | |||
p = buf = mandoc_malloc(10 + 4 + 4 + 1); | |||
if (0 == (nsz = strftime(p, rsz, "%B ", &tm))) | if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) | ||
return(NULL); | goto fail; | ||
p += (int)ssz; | |||
p += (int)nsz; | if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) | ||
rsz -= nsz; | goto fail; | ||
if (-1 == (isz = snprintf(p, rsz, "%d, ", tm.tm_mday))) | |||
return(NULL); | |||
p += isz; | p += isz; | ||
rsz -= isz; | |||
return(strftime(p, rsz, "%Y", &tm) ? buf : NULL); | if (0 == strftime(p, 4 + 1, "%Y", tm)) | ||
goto fail; | |||
return(buf); | |||
fail: | |||
free(buf); | |||
return(NULL); | |||
} | } | ||
char * | char * | ||
mandoc_normdate(char *in, mandocmsg msg, void *data, int ln, int pos) | mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) | ||
{ | { | ||
char *out; | char *out; | ||
time_t t; | time_t t; | ||
if (NULL == in || '\0' == *in || | if (NULL == in || '\0' == *in || | ||
0 == strcmp(in, "$" "Mdocdate$")) { | 0 == strcmp(in, "$" "Mdocdate$")) { | ||
(*msg)(MANDOCERR_NODATE, data, ln, pos, NULL); | mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); | ||
time(&t); | time(&t); | ||
} | } | ||
else if (a2time(&t, "%Y-%m-%d", in)) | |||
t = 0; | |||
else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | ||
!a2time(&t, "%b %d, %Y", in) && | !a2time(&t, "%b %d, %Y", in)) { | ||
!a2time(&t, "%Y-%m-%d", in)) { | mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); | ||
(*msg)(MANDOCERR_BADDATE, data, ln, pos, NULL); | |||
t = 0; | t = 0; | ||
} | } | ||
out = t ? time2a(t) : NULL; | out = t ? time2a(t) : NULL; | ||
return(mandoc_strdup(out ? out : in)); | return(out ? out : mandoc_strdup(in)); | ||
} | } | ||
int | int | ||
mandoc_eos(const char *p, size_t sz, int enclosed) | mandoc_eos(const char *p, size_t sz, int enclosed) | ||
{ | { | ||
|
|
||
/* | /* | ||
* End-of-sentence recognition must include situations where | * End-of-sentence recognition must include situations where | ||
* some symbols, such as `)', allow prior EOS punctuation to | * some symbols, such as `)', allow prior EOS punctuation to | ||
* propogate outward. | * propagate outward. | ||
*/ | */ | ||
found = 0; | found = 0; | ||
|
|
||
return(found && !enclosed); | return(found && !enclosed); | ||
} | } | ||
/* | |||
* Convert a string to a long that may not be <0. | |||
* If the string is invalid, or is less than 0, return -1. | |||
*/ | |||
int | int | ||
mandoc_hyph(const char *start, const char *c) | mandoc_strntoi(const char *p, size_t sz, int base) | ||
{ | { | ||
char buf[32]; | |||
char *ep; | |||
long v; | |||
/* | if (sz > 31) | ||
* Choose whether to break at a hyphenated character. We only | return(-1); | ||
* do this if it's free-standing within a word. | |||
*/ | |||
/* Skip first/last character of buffer. */ | memcpy(buf, p, sz); | ||
if (c == start || '\0' == *(c + 1)) | buf[(int)sz] = '\0'; | ||
return(0); | |||
/* Skip first/last character of word. */ | |||
if ('\t' == *(c + 1) || '\t' == *(c - 1)) | |||
return(0); | |||
if (' ' == *(c + 1) || ' ' == *(c - 1)) | |||
return(0); | |||
/* Skip double invocations. */ | |||
if ('-' == *(c + 1) || '-' == *(c - 1)) | |||
return(0); | |||
/* Skip escapes. */ | |||
if ('\\' == *(c - 1)) | |||
return(0); | |||
return(1); | errno = 0; | ||
v = strtol(buf, &ep, base); | |||
if (buf[0] == '\0' || *ep != '\0') | |||
return(-1); | |||
if (v > INT_MAX) | |||
v = INT_MAX; | |||
if (v < INT_MIN) | |||
v = INT_MIN; | |||
return((int)v); | |||
} | } |