Return to mandoc.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.23, 2010/07/18 17:00:26 | version 1.49, 2011/04/30 10:18:24 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> | |||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | * copyright notice and this permission notice appear in all copies. | ||
* | * | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
|
|
||
#include "mandoc.h" | #include "mandoc.h" | ||
#include "libmandoc.h" | #include "libmandoc.h" | ||
#define DATESIZE 32 | |||
static int a2time(time_t *, const char *, const char *); | static int a2time(time_t *, const char *, const char *); | ||
static char *time2a(time_t); | |||
static int numescape(const char *); | |||
/* | |||
* Pass over recursive numerical expressions. This context of this | |||
* function is important: it's only called within character-terminating | |||
* escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial | |||
* recursion: we don't care about what's in these blocks. | |||
* This returns the number of characters skipped or -1 if an error | |||
* occurs (the caller should bail). | |||
*/ | |||
static int | |||
numescape(const char *start) | |||
{ | |||
int i; | |||
size_t sz; | |||
const char *cp; | |||
int | i = 0; | ||
mandoc_special(char *p) | |||
/* The expression consists of a subexpression. */ | |||
if ('\\' == start[i]) { | |||
cp = &start[++i]; | |||
/* | |||
* Read past the end of the subexpression. | |||
* Bail immediately on errors. | |||
*/ | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
return(i + cp - &start[i]); | |||
} | |||
if ('(' != start[i++]) | |||
return(0); | |||
/* | |||
* A parenthesised subexpression. Read until the closing | |||
* parenthesis, making sure to handle any nested subexpressions | |||
* that might ruin our parse. | |||
*/ | |||
while (')' != start[i]) { | |||
sz = strcspn(&start[i], ")\\"); | |||
i += (int)sz; | |||
if ('\0' == start[i]) | |||
return(-1); | |||
else if ('\\' != start[i]) | |||
continue; | |||
cp = &start[++i]; | |||
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) | |||
return(-1); | |||
i += cp - &start[i]; | |||
} | |||
/* Read past the terminating ')'. */ | |||
return(++i); | |||
} | |||
enum mandoc_esc | |||
mandoc_escape(const char **end, const char **start, int *sz) | |||
{ | { | ||
int len, i; | char c, term, numeric; | ||
char term; | int i, lim, ssz, rlim; | ||
char *sv; | const char *cp, *rstart; | ||
enum mandoc_esc gly; | |||
len = 0; | |||
term = '\0'; | |||
sv = p; | |||
assert('\\' == *p); | cp = *end; | ||
p++; | rstart = cp; | ||
if (start) | |||
*start = rstart; | |||
i = lim = 0; | |||
gly = ESCAPE_ERROR; | |||
term = numeric = '\0'; | |||
switch (*p++) { | switch ((c = cp[i++])) { | ||
case ('s'): | /* | ||
if (ASCII_HYPH == *p) | * First the glyphs. There are several different forms of | ||
*p = '-'; | * these, but each eventually returns a substring of the glyph | ||
if ('+' == *p || '-' == *p) | * name. | ||
p++; | */ | ||
case ('('): | |||
gly = ESCAPE_SPECIAL; | |||
lim = 2; | |||
break; | |||
case ('['): | |||
gly = ESCAPE_SPECIAL; | |||
term = ']'; | |||
break; | |||
case ('C'): | |||
if ('\'' != cp[i]) | |||
return(ESCAPE_ERROR); | |||
gly = ESCAPE_SPECIAL; | |||
term = '\''; | |||
break; | |||
i = ('s' != *(p - 1)); | /* | ||
* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where | |||
* 'X' is the trigger. These have opaque sub-strings. | |||
*/ | |||
case ('F'): | |||
/* FALLTHROUGH */ | |||
case ('g'): | |||
/* FALLTHROUGH */ | |||
case ('k'): | |||
/* FALLTHROUGH */ | |||
case ('M'): | |||
/* FALLTHROUGH */ | |||
case ('m'): | |||
/* FALLTHROUGH */ | |||
case ('n'): | |||
/* FALLTHROUGH */ | |||
case ('V'): | |||
/* FALLTHROUGH */ | |||
case ('Y'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | |||
/* FALLTHROUGH */ | |||
case ('*'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_PREDEF; | |||
/* FALLTHROUGH */ | |||
case ('f'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_FONT; | |||
switch (*p++) { | rstart= &cp[i]; | ||
if (start) | |||
*start = rstart; | |||
switch (cp[i++]) { | |||
case ('('): | case ('('): | ||
len = 2; | lim = 2; | ||
break; | break; | ||
case ('['): | case ('['): | ||
term = ']'; | term = ']'; | ||
break; | break; | ||
case ('\''): | |||
term = '\''; | |||
break; | |||
default: | default: | ||
len = 1; | lim = 1; | ||
p--; | i--; | ||
break; | break; | ||
} | } | ||
break; | |||
if (ASCII_HYPH == *p) | /* | ||
*p = '-'; | * These escapes are of the form \X'Y', where 'X' is the trigger | ||
if ('+' == *p || '-' == *p) { | * and 'Y' is any string. These have opaque sub-strings. | ||
if (i++) | */ | ||
return(0); | case ('A'): | ||
p++; | /* FALLTHROUGH */ | ||
} | case ('b'): | ||
/* FALLTHROUGH */ | |||
if (0 == i) | case ('D'): | ||
return(0); | /* FALLTHROUGH */ | ||
case ('o'): | |||
/* FALLTHROUGH */ | |||
case ('R'): | |||
/* FALLTHROUGH */ | |||
case ('X'): | |||
/* FALLTHROUGH */ | |||
case ('Z'): | |||
if ('\'' != cp[i++]) | |||
return(ESCAPE_ERROR); | |||
gly = ESCAPE_IGNORE; | |||
term = '\''; | |||
break; | break; | ||
case ('f'): | |||
/* | |||
* These escapes are of the form \X'N', where 'X' is the trigger | |||
* and 'N' resolves to a numerical expression. | |||
*/ | |||
case ('B'): | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('F'): | case ('h'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case ('*'): | case ('H'): | ||
switch (*p++) { | /* FALLTHROUGH */ | ||
case ('L'): | |||
/* FALLTHROUGH */ | |||
case ('l'): | |||
/* FALLTHROUGH */ | |||
case ('N'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_NUMBERED; | |||
/* FALLTHROUGH */ | |||
case ('S'): | |||
/* FALLTHROUGH */ | |||
case ('v'): | |||
/* FALLTHROUGH */ | |||
case ('w'): | |||
/* FALLTHROUGH */ | |||
case ('x'): | |||
if (ESCAPE_ERROR == gly) | |||
gly = ESCAPE_IGNORE; | |||
if ('\'' != cp[i++]) | |||
return(ESCAPE_ERROR); | |||
term = numeric = '\''; | |||
break; | |||
/* | |||
* Sizes get a special category of their own. | |||
*/ | |||
case ('s'): | |||
gly = ESCAPE_IGNORE; | |||
rstart = &cp[i]; | |||
if (start) | |||
*start = rstart; | |||
/* See +/- counts as a sign. */ | |||
c = cp[i]; | |||
if ('+' == c || '-' == c || ASCII_HYPH == c) | |||
++i; | |||
switch (cp[i++]) { | |||
case ('('): | case ('('): | ||
len = 2; | lim = 2; | ||
break; | break; | ||
case ('['): | case ('['): | ||
term = ']'; | term = numeric = ']'; | ||
break; | break; | ||
case ('\''): | |||
term = numeric = '\''; | |||
break; | |||
default: | default: | ||
len = 1; | lim = 1; | ||
p--; | i--; | ||
break; | break; | ||
} | } | ||
/* See +/- counts as a sign. */ | |||
c = cp[i]; | |||
if ('+' == c || '-' == c || ASCII_HYPH == c) | |||
++i; | |||
break; | break; | ||
case ('('): | |||
len = 2; | /* | ||
break; | * Anything else is assumed to be a glyph. | ||
case ('['): | */ | ||
term = ']'; | |||
break; | |||
default: | default: | ||
len = 1; | gly = ESCAPE_SPECIAL; | ||
p--; | lim = 1; | ||
i--; | |||
break; | break; | ||
} | } | ||
if (term) { | assert(ESCAPE_ERROR != gly); | ||
for ( ; *p && term != *p; p++) | |||
if (ASCII_HYPH == *p) | rstart = &cp[i]; | ||
*p = '-'; | if (start) | ||
return(*p ? p - sv : 0); | *start = rstart; | ||
/* | |||
* If a terminating block has been specified, we need to | |||
* handle the case of recursion, which could have their | |||
* own terminating blocks that mess up our parse. This, by the | |||
* way, means that the "start" and "size" values will be | |||
* effectively meaningless. | |||
*/ | |||
ssz = 0; | |||
if (numeric && -1 == (ssz = numescape(&cp[i]))) | |||
return(ESCAPE_ERROR); | |||
i += ssz; | |||
rlim = -1; | |||
/* | |||
* We have a character terminator. Try to read up to that | |||
* character. If we can't (i.e., we hit the nil), then return | |||
* an error; if we can, calculate our length, read past the | |||
* terminating character, and exit. | |||
*/ | |||
if ('\0' != term) { | |||
*end = strchr(&cp[i], term); | |||
if ('\0' == *end) | |||
return(ESCAPE_ERROR); | |||
rlim = *end - &cp[i]; | |||
if (sz) | |||
*sz = rlim; | |||
(*end)++; | |||
goto out; | |||
} | } | ||
for (i = 0; *p && i < len; i++, p++) | assert(lim > 0); | ||
if (ASCII_HYPH == *p) | |||
*p = '-'; | |||
return(i == len ? p - sv : 0); | |||
} | |||
/* | |||
* We have a numeric limit. If the string is shorter than that, | |||
* stop and return an error. Else adjust our endpoint, length, | |||
* and return the current glyph. | |||
*/ | |||
if ((size_t)lim > strlen(&cp[i])) | |||
return(ESCAPE_ERROR); | |||
rlim = lim; | |||
if (sz) | |||
*sz = rlim; | |||
*end = &cp[i] + lim; | |||
out: | |||
assert(rlim >= 0 && rstart); | |||
/* Run post-processors. */ | |||
switch (gly) { | |||
case (ESCAPE_FONT): | |||
if (1 != rlim) | |||
break; | |||
switch (*rstart) { | |||
case ('3'): | |||
/* FALLTHROUGH */ | |||
case ('B'): | |||
gly = ESCAPE_FONTBOLD; | |||
break; | |||
case ('2'): | |||
/* FALLTHROUGH */ | |||
case ('I'): | |||
gly = ESCAPE_FONTITALIC; | |||
break; | |||
case ('P'): | |||
gly = ESCAPE_FONTPREV; | |||
break; | |||
case ('1'): | |||
/* FALLTHROUGH */ | |||
case ('R'): | |||
gly = ESCAPE_FONTROMAN; | |||
break; | |||
} | |||
break; | |||
case (ESCAPE_SPECIAL): | |||
if (1 != rlim) | |||
break; | |||
if ('c' == *rstart) | |||
gly = ESCAPE_NOSPACE; | |||
break; | |||
default: | |||
break; | |||
} | |||
return(gly); | |||
} | |||
void * | void * | ||
mandoc_calloc(size_t num, size_t size) | mandoc_calloc(size_t num, size_t size) | ||
{ | { | ||
|
|
||
ptr = calloc(num, size); | ptr = calloc(num, size); | ||
if (NULL == ptr) { | if (NULL == ptr) { | ||
perror(NULL); | perror(NULL); | ||
exit(EXIT_FAILURE); | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
return(ptr); | return(ptr); | ||
|
|
||
ptr = malloc(size); | ptr = malloc(size); | ||
if (NULL == ptr) { | if (NULL == ptr) { | ||
perror(NULL); | perror(NULL); | ||
exit(EXIT_FAILURE); | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
return(ptr); | return(ptr); | ||
|
|
||
ptr = realloc(ptr, size); | ptr = realloc(ptr, size); | ||
if (NULL == ptr) { | if (NULL == ptr) { | ||
perror(NULL); | perror(NULL); | ||
exit(EXIT_FAILURE); | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
return(ptr); | return(ptr); | ||
|
|
||
p = strdup(ptr); | p = strdup(ptr); | ||
if (NULL == p) { | if (NULL == p) { | ||
perror(NULL); | perror(NULL); | ||
exit(EXIT_FAILURE); | exit((int)MANDOCLEVEL_SYSERR); | ||
} | } | ||
return(p); | return(p); | ||
} | } | ||
/* | |||
* Parse a quoted or unquoted roff-style request or macro argument. | |||
* Return a pointer to the parsed argument, which is either the original | |||
* pointer or advanced by one byte in case the argument is quoted. | |||
* Null-terminate the argument in place. | |||
* Collapse pairs of quotes inside quoted arguments. | |||
* Advance the argument pointer to the next argument, | |||
* or to the null byte terminating the argument line. | |||
*/ | |||
char * | |||
mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) | |||
{ | |||
char *start, *cp; | |||
int quoted, pairs, white; | |||
/* Quoting can only start with a new word. */ | |||
start = *cpp; | |||
quoted = 0; | |||
if ('"' == *start) { | |||
quoted = 1; | |||
start++; | |||
} | |||
pairs = 0; | |||
white = 0; | |||
for (cp = start; '\0' != *cp; cp++) { | |||
/* Move left after quoted quotes and escaped backslashes. */ | |||
if (pairs) | |||
cp[-pairs] = cp[0]; | |||
if ('\\' == cp[0]) { | |||
if ('\\' == cp[1]) { | |||
/* Poor man's copy mode. */ | |||
pairs++; | |||
cp++; | |||
} else if (0 == quoted && ' ' == cp[1]) | |||
/* Skip escaped blanks. */ | |||
cp++; | |||
} else if (0 == quoted) { | |||
if (' ' == cp[0]) { | |||
/* Unescaped blanks end unquoted args. */ | |||
white = 1; | |||
break; | |||
} | |||
} else if ('"' == cp[0]) { | |||
if ('"' == cp[1]) { | |||
/* Quoted quotes collapse. */ | |||
pairs++; | |||
cp++; | |||
} else { | |||
/* Unquoted quotes end quoted args. */ | |||
quoted = 2; | |||
break; | |||
} | |||
} | |||
} | |||
/* Quoted argument without a closing quote. */ | |||
if (1 == quoted) | |||
mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); | |||
/* Null-terminate this argument and move to the next one. */ | |||
if (pairs) | |||
cp[-pairs] = '\0'; | |||
if ('\0' != *cp) { | |||
*cp++ = '\0'; | |||
while (' ' == *cp) | |||
cp++; | |||
} | |||
*pos += (int)(cp - start) + (quoted ? 1 : 0); | |||
*cpp = cp; | |||
if ('\0' == *cp && (white || ' ' == cp[-1])) | |||
mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); | |||
return(start); | |||
} | |||
static int | static int | ||
a2time(time_t *t, const char *fmt, const char *p) | a2time(time_t *t, const char *fmt, const char *p) | ||
{ | { | ||
|
|
||
return(0); | return(0); | ||
} | } | ||
static char * | |||
/* | time2a(time_t t) | ||
* Convert from a manual date string (see mdoc(7) and man(7)) into a | |||
* date according to the stipulated date type. | |||
*/ | |||
time_t | |||
mandoc_a2time(int flags, const char *p) | |||
{ | { | ||
time_t t; | struct tm tm; | ||
char *buf, *p; | |||
size_t ssz; | |||
int isz; | |||
if (MTIME_MDOCDATE & flags) { | localtime_r(&t, &tm); | ||
if (0 == strcmp(p, "$" "Mdocdate$")) | |||
return(time(NULL)); | |||
if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p)) | |||
return(t); | |||
} | |||
if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags) | /* | ||
if (a2time(&t, "%b %d, %Y", p)) | * Reserve space: | ||
return(t); | * up to 9 characters for the month (September) + blank | ||
* up to 2 characters for the day + comma + blank | |||
* 4 characters for the year and a terminating '\0' | |||
*/ | |||
p = buf = mandoc_malloc(10 + 4 + 4 + 1); | |||
if (MTIME_ISO_8601 & flags) | if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm))) | ||
if (a2time(&t, "%Y-%m-%d", p)) | goto fail; | ||
return(t); | p += (int)ssz; | ||
if (MTIME_REDUCED & flags) { | if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday))) | ||
if (a2time(&t, "%d, %Y", p)) | goto fail; | ||
return(t); | p += isz; | ||
if (a2time(&t, "%Y", p)) | |||
return(t); | |||
} | |||
return(0); | if (0 == strftime(p, 4 + 1, "%Y", &tm)) | ||
goto fail; | |||
return(buf); | |||
fail: | |||
free(buf); | |||
return(NULL); | |||
} | } | ||
char * | |||
mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) | |||
{ | |||
char *out; | |||
time_t t; | |||
if (NULL == in || '\0' == *in || | |||
0 == strcmp(in, "$" "Mdocdate$")) { | |||
mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); | |||
time(&t); | |||
} | |||
else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && | |||
!a2time(&t, "%b %d, %Y", in) && | |||
!a2time(&t, "%Y-%m-%d", in)) { | |||
mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); | |||
t = 0; | |||
} | |||
out = t ? time2a(t) : NULL; | |||
return(out ? out : mandoc_strdup(in)); | |||
} | |||
int | int | ||
mandoc_eos(const char *p, size_t sz, int enclosed) | mandoc_eos(const char *p, size_t sz, int enclosed) | ||
{ | { | ||
|
|
||
/* | /* | ||
* End-of-sentence recognition must include situations where | * End-of-sentence recognition must include situations where | ||
* some symbols, such as `)', allow prior EOS punctuation to | * some symbols, such as `)', allow prior EOS punctuation to | ||
* propogate outward. | * propagate outward. | ||
*/ | */ | ||
found = 0; | found = 0; | ||
for (q = p + sz - 1; q >= p; q--) { | for (q = p + (int)sz - 1; q >= p; q--) { | ||
switch (*q) { | switch (*q) { | ||
case ('\"'): | case ('\"'): | ||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
|
|
||
found = 1; | found = 1; | ||
break; | break; | ||
default: | default: | ||
return(found && (!enclosed || isalnum(*q))); | return(found && (!enclosed || isalnum((unsigned char)*q))); | ||
} | } | ||
} | } | ||
return(found && !enclosed); | return(found && !enclosed); | ||
} | } | ||
int | int | ||
mandoc_hyph(const char *start, const char *c) | mandoc_hyph(const char *start, const char *c) | ||
{ | { | ||
|
|
||
if ('\\' == *(c - 1)) | if ('\\' == *(c - 1)) | ||
return(0); | return(0); | ||
return(1); | |||
} | |||
/* | |||
* Find out whether a line is a macro line or not. If it is, adjust the | |||
* current position and return one; if it isn't, return zero and don't | |||
* change the current position. | |||
*/ | |||
int | |||
mandoc_getcontrol(const char *cp, int *ppos) | |||
{ | |||
int pos; | |||
pos = *ppos; | |||
if ('\\' == cp[pos] && '.' == cp[pos + 1]) | |||
pos += 2; | |||
else if ('.' == cp[pos] || '\'' == cp[pos]) | |||
pos++; | |||
else | |||
return(0); | |||
while (' ' == cp[pos] || '\t' == cp[pos]) | |||
pos++; | |||
*ppos = pos; | |||
return(1); | return(1); | ||
} | } |