Annotation of mandoc/mandoc.c, Revision 1.77
1.77 ! schwarze 1: /* $Id: mandoc.c,v 1.76 2014/03/23 11:25:26 schwarze Exp $ */
1.1 kristaps 2: /*
1.59 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.68 schwarze 4: * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.36 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.36 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.9 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
1.7 kristaps 20: #endif
21:
1.2 kristaps 22: #include <sys/types.h>
23:
1.1 kristaps 24: #include <assert.h>
25: #include <ctype.h>
1.50 kristaps 26: #include <errno.h>
27: #include <limits.h>
1.1 kristaps 28: #include <stdlib.h>
1.4 kristaps 29: #include <stdio.h>
30: #include <string.h>
1.7 kristaps 31: #include <time.h>
1.1 kristaps 32:
1.18 kristaps 33: #include "mandoc.h"
1.76 schwarze 34: #include "mandoc_aux.h"
1.1 kristaps 35: #include "libmandoc.h"
36:
1.37 schwarze 37: #define DATESIZE 32
38:
1.18 kristaps 39: static int a2time(time_t *, const char *, const char *);
1.37 schwarze 40: static char *time2a(time_t);
1.7 kristaps 41:
1.45 kristaps 42:
43: enum mandoc_esc
1.74 schwarze 44: mandoc_escape(const char **end, const char **start, int *sz)
1.1 kristaps 45: {
1.65 schwarze 46: const char *local_start;
47: int local_sz;
48: char term;
1.45 kristaps 49: enum mandoc_esc gly;
50:
1.65 schwarze 51: /*
52: * When the caller doesn't provide return storage,
53: * use local storage.
54: */
55:
56: if (NULL == start)
57: start = &local_start;
58: if (NULL == sz)
59: sz = &local_sz;
60:
61: /*
62: * Beyond the backslash, at least one input character
63: * is part of the escape sequence. With one exception
64: * (see below), that character won't be returned.
65: */
66:
1.45 kristaps 67: gly = ESCAPE_ERROR;
1.65 schwarze 68: *start = ++*end;
69: *sz = 0;
1.64 schwarze 70: term = '\0';
1.18 kristaps 71:
1.65 schwarze 72: switch ((*start)[-1]) {
1.45 kristaps 73: /*
74: * First the glyphs. There are several different forms of
75: * these, but each eventually returns a substring of the glyph
76: * name.
77: */
78: case ('('):
79: gly = ESCAPE_SPECIAL;
1.65 schwarze 80: *sz = 2;
1.45 kristaps 81: break;
82: case ('['):
83: gly = ESCAPE_SPECIAL;
1.52 kristaps 84: /*
85: * Unicode escapes are defined in groff as \[uXXXX] to
86: * \[u10FFFF], where the contained value must be a valid
87: * Unicode codepoint. Here, however, only check whether
88: * it's not a zero-width escape.
89: */
1.65 schwarze 90: if ('u' == (*start)[0] && ']' != (*start)[1])
1.52 kristaps 91: gly = ESCAPE_UNICODE;
1.45 kristaps 92: term = ']';
93: break;
94: case ('C'):
1.65 schwarze 95: if ('\'' != **start)
1.45 kristaps 96: return(ESCAPE_ERROR);
1.65 schwarze 97: *start = ++*end;
1.70 schwarze 98: if ('u' == (*start)[0] && '\'' != (*start)[1])
99: gly = ESCAPE_UNICODE;
100: else
101: gly = ESCAPE_SPECIAL;
1.45 kristaps 102: term = '\'';
103: break;
1.72 schwarze 104:
105: /*
106: * Escapes taking no arguments at all.
107: */
108: case ('d'):
109: /* FALLTHROUGH */
110: case ('u'):
111: return(ESCAPE_IGNORE);
1.63 schwarze 112:
113: /*
114: * The \z escape is supposed to output the following
115: * character without advancing the cursor position.
116: * Since we are mostly dealing with terminal mode,
117: * let us just skip the next character.
118: */
119: case ('z'):
120: return(ESCAPE_SKIPCHAR);
1.1 kristaps 121:
1.45 kristaps 122: /*
123: * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
124: * 'X' is the trigger. These have opaque sub-strings.
125: */
126: case ('F'):
127: /* FALLTHROUGH */
128: case ('g'):
1.24 kristaps 129: /* FALLTHROUGH */
1.45 kristaps 130: case ('k'):
1.24 kristaps 131: /* FALLTHROUGH */
1.45 kristaps 132: case ('M'):
1.24 kristaps 133: /* FALLTHROUGH */
1.45 kristaps 134: case ('m'):
1.24 kristaps 135: /* FALLTHROUGH */
1.45 kristaps 136: case ('n'):
1.24 kristaps 137: /* FALLTHROUGH */
1.45 kristaps 138: case ('V'):
1.24 kristaps 139: /* FALLTHROUGH */
1.45 kristaps 140: case ('Y'):
1.60 schwarze 141: gly = ESCAPE_IGNORE;
1.24 kristaps 142: /* FALLTHROUGH */
1.45 kristaps 143: case ('f'):
144: if (ESCAPE_ERROR == gly)
145: gly = ESCAPE_FONT;
1.65 schwarze 146: switch (**start) {
1.45 kristaps 147: case ('('):
1.65 schwarze 148: *start = ++*end;
149: *sz = 2;
1.45 kristaps 150: break;
151: case ('['):
1.65 schwarze 152: *start = ++*end;
1.45 kristaps 153: term = ']';
154: break;
155: default:
1.65 schwarze 156: *sz = 1;
1.45 kristaps 157: break;
158: }
159: break;
160:
161: /*
162: * These escapes are of the form \X'Y', where 'X' is the trigger
163: * and 'Y' is any string. These have opaque sub-strings.
164: */
165: case ('A'):
1.24 kristaps 166: /* FALLTHROUGH */
1.45 kristaps 167: case ('b'):
1.24 kristaps 168: /* FALLTHROUGH */
1.73 schwarze 169: case ('B'):
170: /* FALLTHROUGH */
1.24 kristaps 171: case ('D'):
172: /* FALLTHROUGH */
1.45 kristaps 173: case ('o'):
1.24 kristaps 174: /* FALLTHROUGH */
1.45 kristaps 175: case ('R'):
1.24 kristaps 176: /* FALLTHROUGH */
1.73 schwarze 177: case ('w'):
178: /* FALLTHROUGH */
1.45 kristaps 179: case ('X'):
1.24 kristaps 180: /* FALLTHROUGH */
1.45 kristaps 181: case ('Z'):
1.77 ! schwarze 182: if ('\0' == **start)
1.45 kristaps 183: return(ESCAPE_ERROR);
184: gly = ESCAPE_IGNORE;
1.77 ! schwarze 185: term = **start;
1.65 schwarze 186: *start = ++*end;
1.24 kristaps 187: break;
1.45 kristaps 188:
189: /*
190: * These escapes are of the form \X'N', where 'X' is the trigger
191: * and 'N' resolves to a numerical expression.
192: */
1.28 kristaps 193: case ('h'):
194: /* FALLTHROUGH */
1.45 kristaps 195: case ('H'):
196: /* FALLTHROUGH */
197: case ('L'):
198: /* FALLTHROUGH */
199: case ('l'):
200: /* FALLTHROUGH */
201: case ('S'):
202: /* FALLTHROUGH */
1.28 kristaps 203: case ('v'):
204: /* FALLTHROUGH */
1.45 kristaps 205: case ('x'):
1.77 ! schwarze 206: if (strchr("\0 %&()*+-./0123456789:<=>", **start))
1.65 schwarze 207: return(ESCAPE_ERROR);
1.73 schwarze 208: gly = ESCAPE_IGNORE;
1.77 ! schwarze 209: term = **start;
1.65 schwarze 210: *start = ++*end;
1.45 kristaps 211: break;
1.60 schwarze 212:
213: /*
214: * Special handling for the numbered character escape.
215: * XXX Do any other escapes need similar handling?
216: */
217: case ('N'):
1.65 schwarze 218: if ('\0' == **start)
1.60 schwarze 219: return(ESCAPE_ERROR);
1.65 schwarze 220: (*end)++;
221: if (isdigit((unsigned char)**start)) {
222: *sz = 1;
1.60 schwarze 223: return(ESCAPE_IGNORE);
1.65 schwarze 224: }
225: (*start)++;
1.60 schwarze 226: while (isdigit((unsigned char)**end))
227: (*end)++;
1.65 schwarze 228: *sz = *end - *start;
1.60 schwarze 229: if ('\0' != **end)
230: (*end)++;
231: return(ESCAPE_NUMBERED);
1.45 kristaps 232:
233: /*
234: * Sizes get a special category of their own.
235: */
1.8 kristaps 236: case ('s'):
1.45 kristaps 237: gly = ESCAPE_IGNORE;
1.28 kristaps 238:
1.45 kristaps 239: /* See +/- counts as a sign. */
1.65 schwarze 240: if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
241: (*end)++;
1.8 kristaps 242:
1.65 schwarze 243: switch (**end) {
1.22 kristaps 244: case ('('):
1.65 schwarze 245: *start = ++*end;
246: *sz = 2;
1.22 kristaps 247: break;
248: case ('['):
1.65 schwarze 249: *start = ++*end;
1.64 schwarze 250: term = ']';
1.22 kristaps 251: break;
252: case ('\''):
1.65 schwarze 253: *start = ++*end;
1.64 schwarze 254: term = '\'';
1.22 kristaps 255: break;
256: default:
1.65 schwarze 257: *sz = 1;
1.22 kristaps 258: break;
1.8 kristaps 259: }
260:
1.45 kristaps 261: break;
1.33 kristaps 262:
1.45 kristaps 263: /*
264: * Anything else is assumed to be a glyph.
1.65 schwarze 265: * In this case, pass back the character after the backslash.
1.45 kristaps 266: */
267: default:
268: gly = ESCAPE_SPECIAL;
1.65 schwarze 269: *start = --*end;
270: *sz = 1;
1.22 kristaps 271: break;
1.45 kristaps 272: }
273:
274: assert(ESCAPE_ERROR != gly);
275:
276: /*
1.64 schwarze 277: * Read up to the terminating character,
278: * paying attention to nested escapes.
1.45 kristaps 279: */
280:
281: if ('\0' != term) {
1.64 schwarze 282: while (**end != term) {
283: switch (**end) {
284: case ('\0'):
285: return(ESCAPE_ERROR);
286: case ('\\'):
287: (*end)++;
288: if (ESCAPE_ERROR ==
289: mandoc_escape(end, NULL, NULL))
290: return(ESCAPE_ERROR);
291: break;
292: default:
293: (*end)++;
294: break;
295: }
296: }
1.65 schwarze 297: *sz = (*end)++ - *start;
1.64 schwarze 298: } else {
1.65 schwarze 299: assert(*sz > 0);
300: if ((size_t)*sz > strlen(*start))
1.45 kristaps 301: return(ESCAPE_ERROR);
1.65 schwarze 302: *end += *sz;
1.45 kristaps 303: }
304:
305: /* Run post-processors. */
306:
307: switch (gly) {
308: case (ESCAPE_FONT):
1.68 schwarze 309: if (2 == *sz) {
310: if ('C' == **start) {
311: /*
312: * Treat constant-width font modes
313: * just like regular font modes.
314: */
315: (*start)++;
316: (*sz)--;
317: } else {
318: if ('B' == (*start)[0] && 'I' == (*start)[1])
319: gly = ESCAPE_FONTBI;
320: break;
321: }
1.65 schwarze 322: } else if (1 != *sz)
1.45 kristaps 323: break;
1.61 kristaps 324:
1.65 schwarze 325: switch (**start) {
1.45 kristaps 326: case ('3'):
327: /* FALLTHROUGH */
328: case ('B'):
329: gly = ESCAPE_FONTBOLD;
330: break;
331: case ('2'):
332: /* FALLTHROUGH */
333: case ('I'):
334: gly = ESCAPE_FONTITALIC;
1.22 kristaps 335: break;
1.45 kristaps 336: case ('P'):
337: gly = ESCAPE_FONTPREV;
1.22 kristaps 338: break;
1.45 kristaps 339: case ('1'):
340: /* FALLTHROUGH */
341: case ('R'):
342: gly = ESCAPE_FONTROMAN;
1.1 kristaps 343: break;
344: }
1.46 kristaps 345: break;
1.45 kristaps 346: case (ESCAPE_SPECIAL):
1.65 schwarze 347: if (1 == *sz && 'c' == **start)
1.45 kristaps 348: gly = ESCAPE_NOSPACE;
1.22 kristaps 349: break;
1.1 kristaps 350: default:
1.22 kristaps 351: break;
1.1 kristaps 352: }
353:
1.45 kristaps 354: return(gly);
1.36 schwarze 355: }
356:
357: /*
358: * Parse a quoted or unquoted roff-style request or macro argument.
359: * Return a pointer to the parsed argument, which is either the original
360: * pointer or advanced by one byte in case the argument is quoted.
1.71 schwarze 361: * NUL-terminate the argument in place.
1.36 schwarze 362: * Collapse pairs of quotes inside quoted arguments.
363: * Advance the argument pointer to the next argument,
1.71 schwarze 364: * or to the NUL byte terminating the argument line.
1.36 schwarze 365: */
366: char *
1.48 kristaps 367: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.36 schwarze 368: {
369: char *start, *cp;
370: int quoted, pairs, white;
371:
372: /* Quoting can only start with a new word. */
373: start = *cpp;
1.47 kristaps 374: quoted = 0;
1.36 schwarze 375: if ('"' == *start) {
376: quoted = 1;
377: start++;
1.47 kristaps 378: }
1.36 schwarze 379:
380: pairs = 0;
381: white = 0;
382: for (cp = start; '\0' != *cp; cp++) {
1.67 schwarze 383:
384: /*
385: * Move the following text left
386: * after quoted quotes and after "\\" and "\t".
387: */
1.36 schwarze 388: if (pairs)
389: cp[-pairs] = cp[0];
1.67 schwarze 390:
1.36 schwarze 391: if ('\\' == cp[0]) {
1.67 schwarze 392: /*
393: * In copy mode, translate double to single
394: * backslashes and backslash-t to literal tabs.
395: */
396: switch (cp[1]) {
397: case ('t'):
398: cp[0] = '\t';
399: /* FALLTHROUGH */
400: case ('\\'):
1.36 schwarze 401: pairs++;
402: cp++;
1.67 schwarze 403: break;
404: case (' '):
1.36 schwarze 405: /* Skip escaped blanks. */
1.67 schwarze 406: if (0 == quoted)
407: cp++;
408: break;
409: default:
410: break;
411: }
1.36 schwarze 412: } else if (0 == quoted) {
413: if (' ' == cp[0]) {
414: /* Unescaped blanks end unquoted args. */
415: white = 1;
416: break;
417: }
418: } else if ('"' == cp[0]) {
419: if ('"' == cp[1]) {
420: /* Quoted quotes collapse. */
421: pairs++;
422: cp++;
423: } else {
424: /* Unquoted quotes end quoted args. */
425: quoted = 2;
426: break;
427: }
428: }
429: }
430:
431: /* Quoted argument without a closing quote. */
1.48 kristaps 432: if (1 == quoted)
1.42 kristaps 433: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.36 schwarze 434:
1.71 schwarze 435: /* NUL-terminate this argument and move to the next one. */
1.36 schwarze 436: if (pairs)
437: cp[-pairs] = '\0';
438: if ('\0' != *cp) {
439: *cp++ = '\0';
440: while (' ' == *cp)
441: cp++;
442: }
1.39 kristaps 443: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.36 schwarze 444: *cpp = cp;
445:
1.48 kristaps 446: if ('\0' == *cp && (white || ' ' == cp[-1]))
1.42 kristaps 447: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.36 schwarze 448:
449: return(start);
1.4 kristaps 450: }
1.7 kristaps 451:
452: static int
453: a2time(time_t *t, const char *fmt, const char *p)
454: {
455: struct tm tm;
456: char *pp;
457:
458: memset(&tm, 0, sizeof(struct tm));
459:
1.56 kristaps 460: pp = NULL;
461: #ifdef HAVE_STRPTIME
1.7 kristaps 462: pp = strptime(p, fmt, &tm);
1.56 kristaps 463: #endif
1.7 kristaps 464: if (NULL != pp && '\0' == *pp) {
465: *t = mktime(&tm);
466: return(1);
467: }
468:
469: return(0);
470: }
471:
1.37 schwarze 472: static char *
473: time2a(time_t t)
474: {
1.56 kristaps 475: struct tm *tm;
1.38 schwarze 476: char *buf, *p;
477: size_t ssz;
1.37 schwarze 478: int isz;
479:
1.56 kristaps 480: tm = localtime(&t);
1.37 schwarze 481:
1.38 schwarze 482: /*
483: * Reserve space:
484: * up to 9 characters for the month (September) + blank
485: * up to 2 characters for the day + comma + blank
486: * 4 characters for the year and a terminating '\0'
487: */
488: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
489:
1.56 kristaps 490: if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
1.38 schwarze 491: goto fail;
492: p += (int)ssz;
1.37 schwarze 493:
1.56 kristaps 494: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
1.38 schwarze 495: goto fail;
1.37 schwarze 496: p += isz;
497:
1.56 kristaps 498: if (0 == strftime(p, 4 + 1, "%Y", tm))
1.38 schwarze 499: goto fail;
500: return(buf);
501:
502: fail:
503: free(buf);
504: return(NULL);
1.37 schwarze 505: }
506:
507: char *
1.42 kristaps 508: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.7 kristaps 509: {
1.37 schwarze 510: char *out;
1.7 kristaps 511: time_t t;
512:
1.37 schwarze 513: if (NULL == in || '\0' == *in ||
514: 0 == strcmp(in, "$" "Mdocdate$")) {
1.42 kristaps 515: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.37 schwarze 516: time(&t);
517: }
1.62 schwarze 518: else if (a2time(&t, "%Y-%m-%d", in))
519: t = 0;
1.37 schwarze 520: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
1.62 schwarze 521: !a2time(&t, "%b %d, %Y", in)) {
1.42 kristaps 522: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.37 schwarze 523: t = 0;
1.7 kristaps 524: }
1.37 schwarze 525: out = t ? time2a(t) : NULL;
1.38 schwarze 526: return(out ? out : mandoc_strdup(in));
1.7 kristaps 527: }
528:
1.12 kristaps 529: int
1.75 schwarze 530: mandoc_eos(const char *p, size_t sz)
1.12 kristaps 531: {
1.75 schwarze 532: const char *q;
533: int enclosed, found;
1.12 kristaps 534:
1.13 kristaps 535: if (0 == sz)
536: return(0);
1.12 kristaps 537:
1.14 kristaps 538: /*
539: * End-of-sentence recognition must include situations where
540: * some symbols, such as `)', allow prior EOS punctuation to
1.49 kristaps 541: * propagate outward.
1.14 kristaps 542: */
543:
1.75 schwarze 544: enclosed = found = 0;
1.25 kristaps 545: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 546: switch (*q) {
1.14 kristaps 547: case ('\"'):
548: /* FALLTHROUGH */
549: case ('\''):
1.15 kristaps 550: /* FALLTHROUGH */
551: case (']'):
1.14 kristaps 552: /* FALLTHROUGH */
553: case (')'):
1.23 schwarze 554: if (0 == found)
555: enclosed = 1;
1.14 kristaps 556: break;
557: case ('.'):
558: /* FALLTHROUGH */
559: case ('!'):
560: /* FALLTHROUGH */
561: case ('?'):
1.23 schwarze 562: found = 1;
563: break;
1.14 kristaps 564: default:
1.27 joerg 565: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 566: }
1.12 kristaps 567: }
568:
1.23 schwarze 569: return(found && !enclosed);
1.44 kristaps 570: }
1.50 kristaps 571:
572: /*
573: * Convert a string to a long that may not be <0.
574: * If the string is invalid, or is less than 0, return -1.
575: */
576: int
1.54 kristaps 577: mandoc_strntoi(const char *p, size_t sz, int base)
1.50 kristaps 578: {
579: char buf[32];
580: char *ep;
581: long v;
582:
583: if (sz > 31)
584: return(-1);
585:
586: memcpy(buf, p, sz);
1.51 kristaps 587: buf[(int)sz] = '\0';
1.50 kristaps 588:
589: errno = 0;
590: v = strtol(buf, &ep, base);
591:
592: if (buf[0] == '\0' || *ep != '\0')
593: return(-1);
594:
1.54 kristaps 595: if (v > INT_MAX)
596: v = INT_MAX;
597: if (v < INT_MIN)
598: v = INT_MIN;
1.50 kristaps 599:
600: return((int)v);
601: }
CVSweb