Annotation of mandoc/mandoc.c, Revision 1.71
1.71 ! schwarze 1: /* $Id: mandoc.c,v 1.70 2013/11/10 21:34:04 schwarze Exp $ */
1.1 kristaps 2: /*
1.59 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.68 schwarze 4: * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.36 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.36 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.9 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
1.7 kristaps 20: #endif
21:
1.2 kristaps 22: #include <sys/types.h>
23:
1.1 kristaps 24: #include <assert.h>
25: #include <ctype.h>
1.50 kristaps 26: #include <errno.h>
27: #include <limits.h>
1.1 kristaps 28: #include <stdlib.h>
1.4 kristaps 29: #include <stdio.h>
30: #include <string.h>
1.7 kristaps 31: #include <time.h>
1.1 kristaps 32:
1.18 kristaps 33: #include "mandoc.h"
1.1 kristaps 34: #include "libmandoc.h"
35:
1.37 schwarze 36: #define DATESIZE 32
37:
1.18 kristaps 38: static int a2time(time_t *, const char *, const char *);
1.37 schwarze 39: static char *time2a(time_t);
1.7 kristaps 40:
1.45 kristaps 41:
42: enum mandoc_esc
1.69 schwarze 43: mandoc_escape(const char const **end, const char const **start, int *sz)
1.1 kristaps 44: {
1.65 schwarze 45: const char *local_start;
46: int local_sz;
47: char term;
1.45 kristaps 48: enum mandoc_esc gly;
49:
1.65 schwarze 50: /*
51: * When the caller doesn't provide return storage,
52: * use local storage.
53: */
54:
55: if (NULL == start)
56: start = &local_start;
57: if (NULL == sz)
58: sz = &local_sz;
59:
60: /*
61: * Beyond the backslash, at least one input character
62: * is part of the escape sequence. With one exception
63: * (see below), that character won't be returned.
64: */
65:
1.45 kristaps 66: gly = ESCAPE_ERROR;
1.65 schwarze 67: *start = ++*end;
68: *sz = 0;
1.64 schwarze 69: term = '\0';
1.18 kristaps 70:
1.65 schwarze 71: switch ((*start)[-1]) {
1.45 kristaps 72: /*
73: * First the glyphs. There are several different forms of
74: * these, but each eventually returns a substring of the glyph
75: * name.
76: */
77: case ('('):
78: gly = ESCAPE_SPECIAL;
1.65 schwarze 79: *sz = 2;
1.45 kristaps 80: break;
81: case ('['):
82: gly = ESCAPE_SPECIAL;
1.52 kristaps 83: /*
84: * Unicode escapes are defined in groff as \[uXXXX] to
85: * \[u10FFFF], where the contained value must be a valid
86: * Unicode codepoint. Here, however, only check whether
87: * it's not a zero-width escape.
88: */
1.65 schwarze 89: if ('u' == (*start)[0] && ']' != (*start)[1])
1.52 kristaps 90: gly = ESCAPE_UNICODE;
1.45 kristaps 91: term = ']';
92: break;
93: case ('C'):
1.65 schwarze 94: if ('\'' != **start)
1.45 kristaps 95: return(ESCAPE_ERROR);
1.65 schwarze 96: *start = ++*end;
1.70 schwarze 97: if ('u' == (*start)[0] && '\'' != (*start)[1])
98: gly = ESCAPE_UNICODE;
99: else
100: gly = ESCAPE_SPECIAL;
1.45 kristaps 101: term = '\'';
102: break;
1.63 schwarze 103:
104: /*
105: * The \z escape is supposed to output the following
106: * character without advancing the cursor position.
107: * Since we are mostly dealing with terminal mode,
108: * let us just skip the next character.
109: */
110: case ('z'):
111: return(ESCAPE_SKIPCHAR);
1.1 kristaps 112:
1.45 kristaps 113: /*
114: * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
115: * 'X' is the trigger. These have opaque sub-strings.
116: */
117: case ('F'):
118: /* FALLTHROUGH */
119: case ('g'):
1.24 kristaps 120: /* FALLTHROUGH */
1.45 kristaps 121: case ('k'):
1.24 kristaps 122: /* FALLTHROUGH */
1.45 kristaps 123: case ('M'):
1.24 kristaps 124: /* FALLTHROUGH */
1.45 kristaps 125: case ('m'):
1.24 kristaps 126: /* FALLTHROUGH */
1.45 kristaps 127: case ('n'):
1.24 kristaps 128: /* FALLTHROUGH */
1.45 kristaps 129: case ('V'):
1.24 kristaps 130: /* FALLTHROUGH */
1.45 kristaps 131: case ('Y'):
1.60 schwarze 132: gly = ESCAPE_IGNORE;
1.24 kristaps 133: /* FALLTHROUGH */
1.45 kristaps 134: case ('f'):
135: if (ESCAPE_ERROR == gly)
136: gly = ESCAPE_FONT;
1.65 schwarze 137: switch (**start) {
1.45 kristaps 138: case ('('):
1.65 schwarze 139: *start = ++*end;
140: *sz = 2;
1.45 kristaps 141: break;
142: case ('['):
1.65 schwarze 143: *start = ++*end;
1.45 kristaps 144: term = ']';
145: break;
146: default:
1.65 schwarze 147: *sz = 1;
1.45 kristaps 148: break;
149: }
150: break;
151:
152: /*
153: * These escapes are of the form \X'Y', where 'X' is the trigger
154: * and 'Y' is any string. These have opaque sub-strings.
155: */
156: case ('A'):
1.24 kristaps 157: /* FALLTHROUGH */
1.45 kristaps 158: case ('b'):
1.24 kristaps 159: /* FALLTHROUGH */
160: case ('D'):
161: /* FALLTHROUGH */
1.45 kristaps 162: case ('o'):
1.24 kristaps 163: /* FALLTHROUGH */
1.45 kristaps 164: case ('R'):
1.24 kristaps 165: /* FALLTHROUGH */
1.45 kristaps 166: case ('X'):
1.24 kristaps 167: /* FALLTHROUGH */
1.45 kristaps 168: case ('Z'):
1.65 schwarze 169: if ('\'' != **start)
1.45 kristaps 170: return(ESCAPE_ERROR);
171: gly = ESCAPE_IGNORE;
1.65 schwarze 172: *start = ++*end;
1.24 kristaps 173: term = '\'';
174: break;
1.45 kristaps 175:
176: /*
177: * These escapes are of the form \X'N', where 'X' is the trigger
178: * and 'N' resolves to a numerical expression.
179: */
180: case ('B'):
181: /* FALLTHROUGH */
1.28 kristaps 182: case ('h'):
183: /* FALLTHROUGH */
1.45 kristaps 184: case ('H'):
185: /* FALLTHROUGH */
186: case ('L'):
187: /* FALLTHROUGH */
188: case ('l'):
1.60 schwarze 189: gly = ESCAPE_NUMBERED;
1.45 kristaps 190: /* FALLTHROUGH */
191: case ('S'):
192: /* FALLTHROUGH */
1.28 kristaps 193: case ('v'):
194: /* FALLTHROUGH */
1.45 kristaps 195: case ('w'):
196: /* FALLTHROUGH */
197: case ('x'):
1.65 schwarze 198: if ('\'' != **start)
199: return(ESCAPE_ERROR);
1.45 kristaps 200: if (ESCAPE_ERROR == gly)
201: gly = ESCAPE_IGNORE;
1.65 schwarze 202: *start = ++*end;
1.64 schwarze 203: term = '\'';
1.45 kristaps 204: break;
1.60 schwarze 205:
206: /*
207: * Special handling for the numbered character escape.
208: * XXX Do any other escapes need similar handling?
209: */
210: case ('N'):
1.65 schwarze 211: if ('\0' == **start)
1.60 schwarze 212: return(ESCAPE_ERROR);
1.65 schwarze 213: (*end)++;
214: if (isdigit((unsigned char)**start)) {
215: *sz = 1;
1.60 schwarze 216: return(ESCAPE_IGNORE);
1.65 schwarze 217: }
218: (*start)++;
1.60 schwarze 219: while (isdigit((unsigned char)**end))
220: (*end)++;
1.65 schwarze 221: *sz = *end - *start;
1.60 schwarze 222: if ('\0' != **end)
223: (*end)++;
224: return(ESCAPE_NUMBERED);
1.45 kristaps 225:
226: /*
227: * Sizes get a special category of their own.
228: */
1.8 kristaps 229: case ('s'):
1.45 kristaps 230: gly = ESCAPE_IGNORE;
1.28 kristaps 231:
1.45 kristaps 232: /* See +/- counts as a sign. */
1.65 schwarze 233: if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
234: (*end)++;
1.8 kristaps 235:
1.65 schwarze 236: switch (**end) {
1.22 kristaps 237: case ('('):
1.65 schwarze 238: *start = ++*end;
239: *sz = 2;
1.22 kristaps 240: break;
241: case ('['):
1.65 schwarze 242: *start = ++*end;
1.64 schwarze 243: term = ']';
1.22 kristaps 244: break;
245: case ('\''):
1.65 schwarze 246: *start = ++*end;
1.64 schwarze 247: term = '\'';
1.22 kristaps 248: break;
249: default:
1.65 schwarze 250: *sz = 1;
1.22 kristaps 251: break;
1.8 kristaps 252: }
253:
1.45 kristaps 254: break;
1.33 kristaps 255:
1.45 kristaps 256: /*
257: * Anything else is assumed to be a glyph.
1.65 schwarze 258: * In this case, pass back the character after the backslash.
1.45 kristaps 259: */
260: default:
261: gly = ESCAPE_SPECIAL;
1.65 schwarze 262: *start = --*end;
263: *sz = 1;
1.22 kristaps 264: break;
1.45 kristaps 265: }
266:
267: assert(ESCAPE_ERROR != gly);
268:
269: /*
1.64 schwarze 270: * Read up to the terminating character,
271: * paying attention to nested escapes.
1.45 kristaps 272: */
273:
274: if ('\0' != term) {
1.64 schwarze 275: while (**end != term) {
276: switch (**end) {
277: case ('\0'):
278: return(ESCAPE_ERROR);
279: case ('\\'):
280: (*end)++;
281: if (ESCAPE_ERROR ==
282: mandoc_escape(end, NULL, NULL))
283: return(ESCAPE_ERROR);
284: break;
285: default:
286: (*end)++;
287: break;
288: }
289: }
1.65 schwarze 290: *sz = (*end)++ - *start;
1.64 schwarze 291: } else {
1.65 schwarze 292: assert(*sz > 0);
293: if ((size_t)*sz > strlen(*start))
1.45 kristaps 294: return(ESCAPE_ERROR);
1.65 schwarze 295: *end += *sz;
1.45 kristaps 296: }
297:
298: /* Run post-processors. */
299:
300: switch (gly) {
301: case (ESCAPE_FONT):
1.68 schwarze 302: if (2 == *sz) {
303: if ('C' == **start) {
304: /*
305: * Treat constant-width font modes
306: * just like regular font modes.
307: */
308: (*start)++;
309: (*sz)--;
310: } else {
311: if ('B' == (*start)[0] && 'I' == (*start)[1])
312: gly = ESCAPE_FONTBI;
313: break;
314: }
1.65 schwarze 315: } else if (1 != *sz)
1.45 kristaps 316: break;
1.61 kristaps 317:
1.65 schwarze 318: switch (**start) {
1.45 kristaps 319: case ('3'):
320: /* FALLTHROUGH */
321: case ('B'):
322: gly = ESCAPE_FONTBOLD;
323: break;
324: case ('2'):
325: /* FALLTHROUGH */
326: case ('I'):
327: gly = ESCAPE_FONTITALIC;
1.22 kristaps 328: break;
1.45 kristaps 329: case ('P'):
330: gly = ESCAPE_FONTPREV;
1.22 kristaps 331: break;
1.45 kristaps 332: case ('1'):
333: /* FALLTHROUGH */
334: case ('R'):
335: gly = ESCAPE_FONTROMAN;
1.1 kristaps 336: break;
337: }
1.46 kristaps 338: break;
1.45 kristaps 339: case (ESCAPE_SPECIAL):
1.65 schwarze 340: if (1 == *sz && 'c' == **start)
1.45 kristaps 341: gly = ESCAPE_NOSPACE;
1.22 kristaps 342: break;
1.1 kristaps 343: default:
1.22 kristaps 344: break;
1.1 kristaps 345: }
346:
1.45 kristaps 347: return(gly);
1.1 kristaps 348: }
1.4 kristaps 349:
350: void *
351: mandoc_calloc(size_t num, size_t size)
352: {
353: void *ptr;
354:
355: ptr = calloc(num, size);
356: if (NULL == ptr) {
1.6 kristaps 357: perror(NULL);
1.35 kristaps 358: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 359: }
360:
361: return(ptr);
362: }
363:
364:
365: void *
366: mandoc_malloc(size_t size)
367: {
368: void *ptr;
369:
370: ptr = malloc(size);
371: if (NULL == ptr) {
1.6 kristaps 372: perror(NULL);
1.35 kristaps 373: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 374: }
375:
376: return(ptr);
377: }
378:
379:
380: void *
381: mandoc_realloc(void *ptr, size_t size)
382: {
383:
384: ptr = realloc(ptr, size);
385: if (NULL == ptr) {
1.6 kristaps 386: perror(NULL);
1.35 kristaps 387: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 388: }
389:
390: return(ptr);
391: }
392:
1.55 kristaps 393: char *
394: mandoc_strndup(const char *ptr, size_t sz)
395: {
396: char *p;
397:
398: p = mandoc_malloc(sz + 1);
399: memcpy(p, ptr, sz);
400: p[(int)sz] = '\0';
401: return(p);
402: }
1.4 kristaps 403:
404: char *
405: mandoc_strdup(const char *ptr)
406: {
407: char *p;
408:
409: p = strdup(ptr);
410: if (NULL == p) {
1.6 kristaps 411: perror(NULL);
1.35 kristaps 412: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 413: }
414:
415: return(p);
1.36 schwarze 416: }
417:
418: /*
419: * Parse a quoted or unquoted roff-style request or macro argument.
420: * Return a pointer to the parsed argument, which is either the original
421: * pointer or advanced by one byte in case the argument is quoted.
1.71 ! schwarze 422: * NUL-terminate the argument in place.
1.36 schwarze 423: * Collapse pairs of quotes inside quoted arguments.
424: * Advance the argument pointer to the next argument,
1.71 ! schwarze 425: * or to the NUL byte terminating the argument line.
1.36 schwarze 426: */
427: char *
1.48 kristaps 428: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.36 schwarze 429: {
430: char *start, *cp;
431: int quoted, pairs, white;
432:
433: /* Quoting can only start with a new word. */
434: start = *cpp;
1.47 kristaps 435: quoted = 0;
1.36 schwarze 436: if ('"' == *start) {
437: quoted = 1;
438: start++;
1.47 kristaps 439: }
1.36 schwarze 440:
441: pairs = 0;
442: white = 0;
443: for (cp = start; '\0' != *cp; cp++) {
1.67 schwarze 444:
445: /*
446: * Move the following text left
447: * after quoted quotes and after "\\" and "\t".
448: */
1.36 schwarze 449: if (pairs)
450: cp[-pairs] = cp[0];
1.67 schwarze 451:
1.36 schwarze 452: if ('\\' == cp[0]) {
1.67 schwarze 453: /*
454: * In copy mode, translate double to single
455: * backslashes and backslash-t to literal tabs.
456: */
457: switch (cp[1]) {
458: case ('t'):
459: cp[0] = '\t';
460: /* FALLTHROUGH */
461: case ('\\'):
1.36 schwarze 462: pairs++;
463: cp++;
1.67 schwarze 464: break;
465: case (' '):
1.36 schwarze 466: /* Skip escaped blanks. */
1.67 schwarze 467: if (0 == quoted)
468: cp++;
469: break;
470: default:
471: break;
472: }
1.36 schwarze 473: } else if (0 == quoted) {
474: if (' ' == cp[0]) {
475: /* Unescaped blanks end unquoted args. */
476: white = 1;
477: break;
478: }
479: } else if ('"' == cp[0]) {
480: if ('"' == cp[1]) {
481: /* Quoted quotes collapse. */
482: pairs++;
483: cp++;
484: } else {
485: /* Unquoted quotes end quoted args. */
486: quoted = 2;
487: break;
488: }
489: }
490: }
491:
492: /* Quoted argument without a closing quote. */
1.48 kristaps 493: if (1 == quoted)
1.42 kristaps 494: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.36 schwarze 495:
1.71 ! schwarze 496: /* NUL-terminate this argument and move to the next one. */
1.36 schwarze 497: if (pairs)
498: cp[-pairs] = '\0';
499: if ('\0' != *cp) {
500: *cp++ = '\0';
501: while (' ' == *cp)
502: cp++;
503: }
1.39 kristaps 504: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.36 schwarze 505: *cpp = cp;
506:
1.48 kristaps 507: if ('\0' == *cp && (white || ' ' == cp[-1]))
1.42 kristaps 508: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.36 schwarze 509:
510: return(start);
1.4 kristaps 511: }
1.7 kristaps 512:
513: static int
514: a2time(time_t *t, const char *fmt, const char *p)
515: {
516: struct tm tm;
517: char *pp;
518:
519: memset(&tm, 0, sizeof(struct tm));
520:
1.56 kristaps 521: pp = NULL;
522: #ifdef HAVE_STRPTIME
1.7 kristaps 523: pp = strptime(p, fmt, &tm);
1.56 kristaps 524: #endif
1.7 kristaps 525: if (NULL != pp && '\0' == *pp) {
526: *t = mktime(&tm);
527: return(1);
528: }
529:
530: return(0);
531: }
532:
1.37 schwarze 533: static char *
534: time2a(time_t t)
535: {
1.56 kristaps 536: struct tm *tm;
1.38 schwarze 537: char *buf, *p;
538: size_t ssz;
1.37 schwarze 539: int isz;
540:
1.56 kristaps 541: tm = localtime(&t);
1.37 schwarze 542:
1.38 schwarze 543: /*
544: * Reserve space:
545: * up to 9 characters for the month (September) + blank
546: * up to 2 characters for the day + comma + blank
547: * 4 characters for the year and a terminating '\0'
548: */
549: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
550:
1.56 kristaps 551: if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
1.38 schwarze 552: goto fail;
553: p += (int)ssz;
1.37 schwarze 554:
1.56 kristaps 555: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
1.38 schwarze 556: goto fail;
1.37 schwarze 557: p += isz;
558:
1.56 kristaps 559: if (0 == strftime(p, 4 + 1, "%Y", tm))
1.38 schwarze 560: goto fail;
561: return(buf);
562:
563: fail:
564: free(buf);
565: return(NULL);
1.37 schwarze 566: }
567:
568: char *
1.42 kristaps 569: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.7 kristaps 570: {
1.37 schwarze 571: char *out;
1.7 kristaps 572: time_t t;
573:
1.37 schwarze 574: if (NULL == in || '\0' == *in ||
575: 0 == strcmp(in, "$" "Mdocdate$")) {
1.42 kristaps 576: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.37 schwarze 577: time(&t);
578: }
1.62 schwarze 579: else if (a2time(&t, "%Y-%m-%d", in))
580: t = 0;
1.37 schwarze 581: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
1.62 schwarze 582: !a2time(&t, "%b %d, %Y", in)) {
1.42 kristaps 583: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.37 schwarze 584: t = 0;
1.7 kristaps 585: }
1.37 schwarze 586: out = t ? time2a(t) : NULL;
1.38 schwarze 587: return(out ? out : mandoc_strdup(in));
1.7 kristaps 588: }
589:
1.12 kristaps 590: int
1.23 schwarze 591: mandoc_eos(const char *p, size_t sz, int enclosed)
1.12 kristaps 592: {
1.23 schwarze 593: const char *q;
594: int found;
1.12 kristaps 595:
1.13 kristaps 596: if (0 == sz)
597: return(0);
1.12 kristaps 598:
1.14 kristaps 599: /*
600: * End-of-sentence recognition must include situations where
601: * some symbols, such as `)', allow prior EOS punctuation to
1.49 kristaps 602: * propagate outward.
1.14 kristaps 603: */
604:
1.23 schwarze 605: found = 0;
1.25 kristaps 606: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 607: switch (*q) {
1.14 kristaps 608: case ('\"'):
609: /* FALLTHROUGH */
610: case ('\''):
1.15 kristaps 611: /* FALLTHROUGH */
612: case (']'):
1.14 kristaps 613: /* FALLTHROUGH */
614: case (')'):
1.23 schwarze 615: if (0 == found)
616: enclosed = 1;
1.14 kristaps 617: break;
618: case ('.'):
619: /* FALLTHROUGH */
620: case ('!'):
621: /* FALLTHROUGH */
622: case ('?'):
1.23 schwarze 623: found = 1;
624: break;
1.14 kristaps 625: default:
1.27 joerg 626: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 627: }
1.12 kristaps 628: }
629:
1.23 schwarze 630: return(found && !enclosed);
1.44 kristaps 631: }
1.50 kristaps 632:
633: /*
634: * Convert a string to a long that may not be <0.
635: * If the string is invalid, or is less than 0, return -1.
636: */
637: int
1.54 kristaps 638: mandoc_strntoi(const char *p, size_t sz, int base)
1.50 kristaps 639: {
640: char buf[32];
641: char *ep;
642: long v;
643:
644: if (sz > 31)
645: return(-1);
646:
647: memcpy(buf, p, sz);
1.51 kristaps 648: buf[(int)sz] = '\0';
1.50 kristaps 649:
650: errno = 0;
651: v = strtol(buf, &ep, base);
652:
653: if (buf[0] == '\0' || *ep != '\0')
654: return(-1);
655:
1.54 kristaps 656: if (v > INT_MAX)
657: v = INT_MAX;
658: if (v < INT_MIN)
659: v = INT_MIN;
1.50 kristaps 660:
661: return((int)v);
662: }
CVSweb