=================================================================== RCS file: /cvs/mandoc/mandoc.c,v retrieving revision 1.48 retrieving revision 1.62 diff -u -p -r1.48 -r1.62 --- mandoc/mandoc.c 2011/04/19 16:38:48 1.48 +++ mandoc/mandoc.c 2011/12/03 16:08:51 1.62 @@ -1,6 +1,6 @@ -/* $Id: mandoc.c,v 1.48 2011/04/19 16:38:48 kristaps Exp $ */ +/* $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any @@ -23,6 +23,8 @@ #include #include +#include +#include #include #include #include @@ -123,6 +125,14 @@ mandoc_escape(const char **end, const char **start, in break; case ('['): gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; term = ']'; break; case ('C'): @@ -151,13 +161,8 @@ mandoc_escape(const char **end, const char **start, in case ('V'): /* FALLTHROUGH */ case ('Y'): - if (ESCAPE_ERROR == gly) - gly = ESCAPE_IGNORE; + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('*'): - if (ESCAPE_ERROR == gly) - gly = ESCAPE_PREDEF; - /* FALLTHROUGH */ case ('f'): if (ESCAPE_ERROR == gly) gly = ESCAPE_FONT; @@ -216,11 +221,8 @@ mandoc_escape(const char **end, const char **start, in case ('L'): /* FALLTHROUGH */ case ('l'): + gly = ESCAPE_NUMBERED; /* FALLTHROUGH */ - case ('N'): - if (ESCAPE_ERROR == gly) - gly = ESCAPE_NUMBERED; - /* FALLTHROUGH */ case ('S'): /* FALLTHROUGH */ case ('v'): @@ -235,6 +237,26 @@ mandoc_escape(const char **end, const char **start, in term = numeric = '\''; break; + /* + * Special handling for the numbered character escape. + * XXX Do any other escapes need similar handling? + */ + case ('N'): + if ('\0' == cp[i]) + return(ESCAPE_ERROR); + *end = &cp[++i]; + if (isdigit((unsigned char)cp[i-1])) + return(ESCAPE_IGNORE); + while (isdigit((unsigned char)**end)) + (*end)++; + if (start) + *start = &cp[i]; + if (sz) + *sz = *end - &cp[i]; + if ('\0' != **end) + (*end)++; + return(ESCAPE_NUMBERED); + /* * Sizes get a special category of their own. */ @@ -347,8 +369,15 @@ out: switch (gly) { case (ESCAPE_FONT): - if (1 != rlim) + /* + * Pretend that the constant-width font modes are the + * same as the regular font modes. + */ + if (2 == rlim && 'C' == *rstart) + rstart++; + else if (1 != rlim) break; + switch (*rstart) { case ('3'): /* FALLTHROUGH */ @@ -426,7 +455,17 @@ mandoc_realloc(void *ptr, size_t size) return(ptr); } +char * +mandoc_strndup(const char *ptr, size_t sz) +{ + char *p; + p = mandoc_malloc(sz + 1); + memcpy(p, ptr, sz); + p[(int)sz] = '\0'; + return(p); +} + char * mandoc_strdup(const char *ptr) { @@ -526,7 +565,10 @@ a2time(time_t *t, const char *fmt, const char *p) memset(&tm, 0, sizeof(struct tm)); + pp = NULL; +#ifdef HAVE_STRPTIME pp = strptime(p, fmt, &tm); +#endif if (NULL != pp && '\0' == *pp) { *t = mktime(&tm); return(1); @@ -538,12 +580,12 @@ a2time(time_t *t, const char *fmt, const char *p) static char * time2a(time_t t) { - struct tm tm; + struct tm *tm; char *buf, *p; size_t ssz; int isz; - localtime_r(&t, &tm); + tm = localtime(&t); /* * Reserve space: @@ -553,15 +595,15 @@ time2a(time_t t) */ p = buf = mandoc_malloc(10 + 4 + 4 + 1); - if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm))) + if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) goto fail; p += (int)ssz; - if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday))) + if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) goto fail; p += isz; - if (0 == strftime(p, 4 + 1, "%Y", &tm)) + if (0 == strftime(p, 4 + 1, "%Y", tm)) goto fail; return(buf); @@ -581,9 +623,10 @@ mandoc_normdate(struct mparse *parse, char *in, int ln mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); time(&t); } + else if (a2time(&t, "%Y-%m-%d", in)) + t = 0; else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && - !a2time(&t, "%b %d, %Y", in) && - !a2time(&t, "%Y-%m-%d", in)) { + !a2time(&t, "%b %d, %Y", in)) { mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); t = 0; } @@ -603,7 +646,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed) /* * End-of-sentence recognition must include situations where * some symbols, such as `)', allow prior EOS punctuation to - * propogate outward. + * propagate outward. */ found = 0; @@ -634,33 +677,6 @@ mandoc_eos(const char *p, size_t sz, int enclosed) return(found && !enclosed); } -int -mandoc_hyph(const char *start, const char *c) -{ - - /* - * Choose whether to break at a hyphenated character. We only - * do this if it's free-standing within a word. - */ - - /* Skip first/last character of buffer. */ - if (c == start || '\0' == *(c + 1)) - return(0); - /* Skip first/last character of word. */ - if ('\t' == *(c + 1) || '\t' == *(c - 1)) - return(0); - if (' ' == *(c + 1) || ' ' == *(c - 1)) - return(0); - /* Skip double invocations. */ - if ('-' == *(c + 1) || '-' == *(c - 1)) - return(0); - /* Skip escapes. */ - if ('\\' == *(c - 1)) - return(0); - - return(1); -} - /* * Find out whether a line is a macro line or not. If it is, adjust the * current position and return one; if it isn't, return zero and don't @@ -685,4 +701,35 @@ mandoc_getcontrol(const char *cp, int *ppos) *ppos = pos; return(1); +} + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntoi(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return(-1); + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return(-1); + + if (v > INT_MAX) + v = INT_MAX; + if (v < INT_MIN) + v = INT_MIN; + + return((int)v); }