Annotation of mandoc/mandoc.c, Revision 1.68
1.68 ! schwarze 1: /* $Id: mandoc.c,v 1.67 2013/06/20 22:39:30 schwarze Exp $ */
1.1 kristaps 2: /*
1.59 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.68 ! schwarze 4: * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.36 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.36 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.9 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
1.7 kristaps 20: #endif
21:
1.2 kristaps 22: #include <sys/types.h>
23:
1.1 kristaps 24: #include <assert.h>
25: #include <ctype.h>
1.50 kristaps 26: #include <errno.h>
27: #include <limits.h>
1.1 kristaps 28: #include <stdlib.h>
1.4 kristaps 29: #include <stdio.h>
30: #include <string.h>
1.7 kristaps 31: #include <time.h>
1.1 kristaps 32:
1.18 kristaps 33: #include "mandoc.h"
1.1 kristaps 34: #include "libmandoc.h"
35:
1.37 schwarze 36: #define DATESIZE 32
37:
1.18 kristaps 38: static int a2time(time_t *, const char *, const char *);
1.37 schwarze 39: static char *time2a(time_t);
1.7 kristaps 40:
1.45 kristaps 41:
42: enum mandoc_esc
43: mandoc_escape(const char **end, const char **start, int *sz)
1.1 kristaps 44: {
1.65 schwarze 45: const char *local_start;
46: int local_sz;
47: char term;
1.45 kristaps 48: enum mandoc_esc gly;
49:
1.65 schwarze 50: /*
51: * When the caller doesn't provide return storage,
52: * use local storage.
53: */
54:
55: if (NULL == start)
56: start = &local_start;
57: if (NULL == sz)
58: sz = &local_sz;
59:
60: /*
61: * Beyond the backslash, at least one input character
62: * is part of the escape sequence. With one exception
63: * (see below), that character won't be returned.
64: */
65:
1.45 kristaps 66: gly = ESCAPE_ERROR;
1.65 schwarze 67: *start = ++*end;
68: *sz = 0;
1.64 schwarze 69: term = '\0';
1.18 kristaps 70:
1.65 schwarze 71: switch ((*start)[-1]) {
1.45 kristaps 72: /*
73: * First the glyphs. There are several different forms of
74: * these, but each eventually returns a substring of the glyph
75: * name.
76: */
77: case ('('):
78: gly = ESCAPE_SPECIAL;
1.65 schwarze 79: *sz = 2;
1.45 kristaps 80: break;
81: case ('['):
82: gly = ESCAPE_SPECIAL;
1.52 kristaps 83: /*
84: * Unicode escapes are defined in groff as \[uXXXX] to
85: * \[u10FFFF], where the contained value must be a valid
86: * Unicode codepoint. Here, however, only check whether
87: * it's not a zero-width escape.
88: */
1.65 schwarze 89: if ('u' == (*start)[0] && ']' != (*start)[1])
1.52 kristaps 90: gly = ESCAPE_UNICODE;
1.45 kristaps 91: term = ']';
92: break;
93: case ('C'):
1.65 schwarze 94: if ('\'' != **start)
1.45 kristaps 95: return(ESCAPE_ERROR);
96: gly = ESCAPE_SPECIAL;
1.65 schwarze 97: *start = ++*end;
1.45 kristaps 98: term = '\'';
99: break;
1.63 schwarze 100:
101: /*
102: * The \z escape is supposed to output the following
103: * character without advancing the cursor position.
104: * Since we are mostly dealing with terminal mode,
105: * let us just skip the next character.
106: */
107: case ('z'):
108: return(ESCAPE_SKIPCHAR);
1.1 kristaps 109:
1.45 kristaps 110: /*
111: * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
112: * 'X' is the trigger. These have opaque sub-strings.
113: */
114: case ('F'):
115: /* FALLTHROUGH */
116: case ('g'):
1.24 kristaps 117: /* FALLTHROUGH */
1.45 kristaps 118: case ('k'):
1.24 kristaps 119: /* FALLTHROUGH */
1.45 kristaps 120: case ('M'):
1.24 kristaps 121: /* FALLTHROUGH */
1.45 kristaps 122: case ('m'):
1.24 kristaps 123: /* FALLTHROUGH */
1.45 kristaps 124: case ('n'):
1.24 kristaps 125: /* FALLTHROUGH */
1.45 kristaps 126: case ('V'):
1.24 kristaps 127: /* FALLTHROUGH */
1.45 kristaps 128: case ('Y'):
1.60 schwarze 129: gly = ESCAPE_IGNORE;
1.24 kristaps 130: /* FALLTHROUGH */
1.45 kristaps 131: case ('f'):
132: if (ESCAPE_ERROR == gly)
133: gly = ESCAPE_FONT;
1.65 schwarze 134: switch (**start) {
1.45 kristaps 135: case ('('):
1.65 schwarze 136: *start = ++*end;
137: *sz = 2;
1.45 kristaps 138: break;
139: case ('['):
1.65 schwarze 140: *start = ++*end;
1.45 kristaps 141: term = ']';
142: break;
143: default:
1.65 schwarze 144: *sz = 1;
1.45 kristaps 145: break;
146: }
147: break;
148:
149: /*
150: * These escapes are of the form \X'Y', where 'X' is the trigger
151: * and 'Y' is any string. These have opaque sub-strings.
152: */
153: case ('A'):
1.24 kristaps 154: /* FALLTHROUGH */
1.45 kristaps 155: case ('b'):
1.24 kristaps 156: /* FALLTHROUGH */
157: case ('D'):
158: /* FALLTHROUGH */
1.45 kristaps 159: case ('o'):
1.24 kristaps 160: /* FALLTHROUGH */
1.45 kristaps 161: case ('R'):
1.24 kristaps 162: /* FALLTHROUGH */
1.45 kristaps 163: case ('X'):
1.24 kristaps 164: /* FALLTHROUGH */
1.45 kristaps 165: case ('Z'):
1.65 schwarze 166: if ('\'' != **start)
1.45 kristaps 167: return(ESCAPE_ERROR);
168: gly = ESCAPE_IGNORE;
1.65 schwarze 169: *start = ++*end;
1.24 kristaps 170: term = '\'';
171: break;
1.45 kristaps 172:
173: /*
174: * These escapes are of the form \X'N', where 'X' is the trigger
175: * and 'N' resolves to a numerical expression.
176: */
177: case ('B'):
178: /* FALLTHROUGH */
1.28 kristaps 179: case ('h'):
180: /* FALLTHROUGH */
1.45 kristaps 181: case ('H'):
182: /* FALLTHROUGH */
183: case ('L'):
184: /* FALLTHROUGH */
185: case ('l'):
1.60 schwarze 186: gly = ESCAPE_NUMBERED;
1.45 kristaps 187: /* FALLTHROUGH */
188: case ('S'):
189: /* FALLTHROUGH */
1.28 kristaps 190: case ('v'):
191: /* FALLTHROUGH */
1.45 kristaps 192: case ('w'):
193: /* FALLTHROUGH */
194: case ('x'):
1.65 schwarze 195: if ('\'' != **start)
196: return(ESCAPE_ERROR);
1.45 kristaps 197: if (ESCAPE_ERROR == gly)
198: gly = ESCAPE_IGNORE;
1.65 schwarze 199: *start = ++*end;
1.64 schwarze 200: term = '\'';
1.45 kristaps 201: break;
1.60 schwarze 202:
203: /*
204: * Special handling for the numbered character escape.
205: * XXX Do any other escapes need similar handling?
206: */
207: case ('N'):
1.65 schwarze 208: if ('\0' == **start)
1.60 schwarze 209: return(ESCAPE_ERROR);
1.65 schwarze 210: (*end)++;
211: if (isdigit((unsigned char)**start)) {
212: *sz = 1;
1.60 schwarze 213: return(ESCAPE_IGNORE);
1.65 schwarze 214: }
215: (*start)++;
1.60 schwarze 216: while (isdigit((unsigned char)**end))
217: (*end)++;
1.65 schwarze 218: *sz = *end - *start;
1.60 schwarze 219: if ('\0' != **end)
220: (*end)++;
221: return(ESCAPE_NUMBERED);
1.45 kristaps 222:
223: /*
224: * Sizes get a special category of their own.
225: */
1.8 kristaps 226: case ('s'):
1.45 kristaps 227: gly = ESCAPE_IGNORE;
1.28 kristaps 228:
1.45 kristaps 229: /* See +/- counts as a sign. */
1.65 schwarze 230: if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
231: (*end)++;
1.8 kristaps 232:
1.65 schwarze 233: switch (**end) {
1.22 kristaps 234: case ('('):
1.65 schwarze 235: *start = ++*end;
236: *sz = 2;
1.22 kristaps 237: break;
238: case ('['):
1.65 schwarze 239: *start = ++*end;
1.64 schwarze 240: term = ']';
1.22 kristaps 241: break;
242: case ('\''):
1.65 schwarze 243: *start = ++*end;
1.64 schwarze 244: term = '\'';
1.22 kristaps 245: break;
246: default:
1.65 schwarze 247: *sz = 1;
1.22 kristaps 248: break;
1.8 kristaps 249: }
250:
1.45 kristaps 251: break;
1.33 kristaps 252:
1.45 kristaps 253: /*
254: * Anything else is assumed to be a glyph.
1.65 schwarze 255: * In this case, pass back the character after the backslash.
1.45 kristaps 256: */
257: default:
258: gly = ESCAPE_SPECIAL;
1.65 schwarze 259: *start = --*end;
260: *sz = 1;
1.22 kristaps 261: break;
1.45 kristaps 262: }
263:
264: assert(ESCAPE_ERROR != gly);
265:
266: /*
1.64 schwarze 267: * Read up to the terminating character,
268: * paying attention to nested escapes.
1.45 kristaps 269: */
270:
271: if ('\0' != term) {
1.64 schwarze 272: while (**end != term) {
273: switch (**end) {
274: case ('\0'):
275: return(ESCAPE_ERROR);
276: case ('\\'):
277: (*end)++;
278: if (ESCAPE_ERROR ==
279: mandoc_escape(end, NULL, NULL))
280: return(ESCAPE_ERROR);
281: break;
282: default:
283: (*end)++;
284: break;
285: }
286: }
1.65 schwarze 287: *sz = (*end)++ - *start;
1.64 schwarze 288: } else {
1.65 schwarze 289: assert(*sz > 0);
290: if ((size_t)*sz > strlen(*start))
1.45 kristaps 291: return(ESCAPE_ERROR);
1.65 schwarze 292: *end += *sz;
1.45 kristaps 293: }
294:
295: /* Run post-processors. */
296:
297: switch (gly) {
298: case (ESCAPE_FONT):
1.68 ! schwarze 299: if (2 == *sz) {
! 300: if ('C' == **start) {
! 301: /*
! 302: * Treat constant-width font modes
! 303: * just like regular font modes.
! 304: */
! 305: (*start)++;
! 306: (*sz)--;
! 307: } else {
! 308: if ('B' == (*start)[0] && 'I' == (*start)[1])
! 309: gly = ESCAPE_FONTBI;
! 310: break;
! 311: }
1.65 schwarze 312: } else if (1 != *sz)
1.45 kristaps 313: break;
1.61 kristaps 314:
1.65 schwarze 315: switch (**start) {
1.45 kristaps 316: case ('3'):
317: /* FALLTHROUGH */
318: case ('B'):
319: gly = ESCAPE_FONTBOLD;
320: break;
321: case ('2'):
322: /* FALLTHROUGH */
323: case ('I'):
324: gly = ESCAPE_FONTITALIC;
1.22 kristaps 325: break;
1.45 kristaps 326: case ('P'):
327: gly = ESCAPE_FONTPREV;
1.22 kristaps 328: break;
1.45 kristaps 329: case ('1'):
330: /* FALLTHROUGH */
331: case ('R'):
332: gly = ESCAPE_FONTROMAN;
1.1 kristaps 333: break;
334: }
1.46 kristaps 335: break;
1.45 kristaps 336: case (ESCAPE_SPECIAL):
1.65 schwarze 337: if (1 == *sz && 'c' == **start)
1.45 kristaps 338: gly = ESCAPE_NOSPACE;
1.22 kristaps 339: break;
1.1 kristaps 340: default:
1.22 kristaps 341: break;
1.1 kristaps 342: }
343:
1.45 kristaps 344: return(gly);
1.1 kristaps 345: }
1.4 kristaps 346:
347: void *
348: mandoc_calloc(size_t num, size_t size)
349: {
350: void *ptr;
351:
352: ptr = calloc(num, size);
353: if (NULL == ptr) {
1.6 kristaps 354: perror(NULL);
1.35 kristaps 355: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 356: }
357:
358: return(ptr);
359: }
360:
361:
362: void *
363: mandoc_malloc(size_t size)
364: {
365: void *ptr;
366:
367: ptr = malloc(size);
368: if (NULL == ptr) {
1.6 kristaps 369: perror(NULL);
1.35 kristaps 370: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 371: }
372:
373: return(ptr);
374: }
375:
376:
377: void *
378: mandoc_realloc(void *ptr, size_t size)
379: {
380:
381: ptr = realloc(ptr, size);
382: if (NULL == ptr) {
1.6 kristaps 383: perror(NULL);
1.35 kristaps 384: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 385: }
386:
387: return(ptr);
388: }
389:
1.55 kristaps 390: char *
391: mandoc_strndup(const char *ptr, size_t sz)
392: {
393: char *p;
394:
395: p = mandoc_malloc(sz + 1);
396: memcpy(p, ptr, sz);
397: p[(int)sz] = '\0';
398: return(p);
399: }
1.4 kristaps 400:
401: char *
402: mandoc_strdup(const char *ptr)
403: {
404: char *p;
405:
406: p = strdup(ptr);
407: if (NULL == p) {
1.6 kristaps 408: perror(NULL);
1.35 kristaps 409: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 410: }
411:
412: return(p);
1.36 schwarze 413: }
414:
415: /*
416: * Parse a quoted or unquoted roff-style request or macro argument.
417: * Return a pointer to the parsed argument, which is either the original
418: * pointer or advanced by one byte in case the argument is quoted.
419: * Null-terminate the argument in place.
420: * Collapse pairs of quotes inside quoted arguments.
421: * Advance the argument pointer to the next argument,
422: * or to the null byte terminating the argument line.
423: */
424: char *
1.48 kristaps 425: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.36 schwarze 426: {
427: char *start, *cp;
428: int quoted, pairs, white;
429:
430: /* Quoting can only start with a new word. */
431: start = *cpp;
1.47 kristaps 432: quoted = 0;
1.36 schwarze 433: if ('"' == *start) {
434: quoted = 1;
435: start++;
1.47 kristaps 436: }
1.36 schwarze 437:
438: pairs = 0;
439: white = 0;
440: for (cp = start; '\0' != *cp; cp++) {
1.67 schwarze 441:
442: /*
443: * Move the following text left
444: * after quoted quotes and after "\\" and "\t".
445: */
1.36 schwarze 446: if (pairs)
447: cp[-pairs] = cp[0];
1.67 schwarze 448:
1.36 schwarze 449: if ('\\' == cp[0]) {
1.67 schwarze 450: /*
451: * In copy mode, translate double to single
452: * backslashes and backslash-t to literal tabs.
453: */
454: switch (cp[1]) {
455: case ('t'):
456: cp[0] = '\t';
457: /* FALLTHROUGH */
458: case ('\\'):
1.36 schwarze 459: pairs++;
460: cp++;
1.67 schwarze 461: break;
462: case (' '):
1.36 schwarze 463: /* Skip escaped blanks. */
1.67 schwarze 464: if (0 == quoted)
465: cp++;
466: break;
467: default:
468: break;
469: }
1.36 schwarze 470: } else if (0 == quoted) {
471: if (' ' == cp[0]) {
472: /* Unescaped blanks end unquoted args. */
473: white = 1;
474: break;
475: }
476: } else if ('"' == cp[0]) {
477: if ('"' == cp[1]) {
478: /* Quoted quotes collapse. */
479: pairs++;
480: cp++;
481: } else {
482: /* Unquoted quotes end quoted args. */
483: quoted = 2;
484: break;
485: }
486: }
487: }
488:
489: /* Quoted argument without a closing quote. */
1.48 kristaps 490: if (1 == quoted)
1.42 kristaps 491: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.36 schwarze 492:
493: /* Null-terminate this argument and move to the next one. */
494: if (pairs)
495: cp[-pairs] = '\0';
496: if ('\0' != *cp) {
497: *cp++ = '\0';
498: while (' ' == *cp)
499: cp++;
500: }
1.39 kristaps 501: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.36 schwarze 502: *cpp = cp;
503:
1.48 kristaps 504: if ('\0' == *cp && (white || ' ' == cp[-1]))
1.42 kristaps 505: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.36 schwarze 506:
507: return(start);
1.4 kristaps 508: }
1.7 kristaps 509:
510: static int
511: a2time(time_t *t, const char *fmt, const char *p)
512: {
513: struct tm tm;
514: char *pp;
515:
516: memset(&tm, 0, sizeof(struct tm));
517:
1.56 kristaps 518: pp = NULL;
519: #ifdef HAVE_STRPTIME
1.7 kristaps 520: pp = strptime(p, fmt, &tm);
1.56 kristaps 521: #endif
1.7 kristaps 522: if (NULL != pp && '\0' == *pp) {
523: *t = mktime(&tm);
524: return(1);
525: }
526:
527: return(0);
528: }
529:
1.37 schwarze 530: static char *
531: time2a(time_t t)
532: {
1.56 kristaps 533: struct tm *tm;
1.38 schwarze 534: char *buf, *p;
535: size_t ssz;
1.37 schwarze 536: int isz;
537:
1.56 kristaps 538: tm = localtime(&t);
1.37 schwarze 539:
1.38 schwarze 540: /*
541: * Reserve space:
542: * up to 9 characters for the month (September) + blank
543: * up to 2 characters for the day + comma + blank
544: * 4 characters for the year and a terminating '\0'
545: */
546: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
547:
1.56 kristaps 548: if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
1.38 schwarze 549: goto fail;
550: p += (int)ssz;
1.37 schwarze 551:
1.56 kristaps 552: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
1.38 schwarze 553: goto fail;
1.37 schwarze 554: p += isz;
555:
1.56 kristaps 556: if (0 == strftime(p, 4 + 1, "%Y", tm))
1.38 schwarze 557: goto fail;
558: return(buf);
559:
560: fail:
561: free(buf);
562: return(NULL);
1.37 schwarze 563: }
564:
565: char *
1.42 kristaps 566: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.7 kristaps 567: {
1.37 schwarze 568: char *out;
1.7 kristaps 569: time_t t;
570:
1.37 schwarze 571: if (NULL == in || '\0' == *in ||
572: 0 == strcmp(in, "$" "Mdocdate$")) {
1.42 kristaps 573: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.37 schwarze 574: time(&t);
575: }
1.62 schwarze 576: else if (a2time(&t, "%Y-%m-%d", in))
577: t = 0;
1.37 schwarze 578: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
1.62 schwarze 579: !a2time(&t, "%b %d, %Y", in)) {
1.42 kristaps 580: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.37 schwarze 581: t = 0;
1.7 kristaps 582: }
1.37 schwarze 583: out = t ? time2a(t) : NULL;
1.38 schwarze 584: return(out ? out : mandoc_strdup(in));
1.7 kristaps 585: }
586:
1.12 kristaps 587: int
1.23 schwarze 588: mandoc_eos(const char *p, size_t sz, int enclosed)
1.12 kristaps 589: {
1.23 schwarze 590: const char *q;
591: int found;
1.12 kristaps 592:
1.13 kristaps 593: if (0 == sz)
594: return(0);
1.12 kristaps 595:
1.14 kristaps 596: /*
597: * End-of-sentence recognition must include situations where
598: * some symbols, such as `)', allow prior EOS punctuation to
1.49 kristaps 599: * propagate outward.
1.14 kristaps 600: */
601:
1.23 schwarze 602: found = 0;
1.25 kristaps 603: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 604: switch (*q) {
1.14 kristaps 605: case ('\"'):
606: /* FALLTHROUGH */
607: case ('\''):
1.15 kristaps 608: /* FALLTHROUGH */
609: case (']'):
1.14 kristaps 610: /* FALLTHROUGH */
611: case (')'):
1.23 schwarze 612: if (0 == found)
613: enclosed = 1;
1.14 kristaps 614: break;
615: case ('.'):
616: /* FALLTHROUGH */
617: case ('!'):
618: /* FALLTHROUGH */
619: case ('?'):
1.23 schwarze 620: found = 1;
621: break;
1.14 kristaps 622: default:
1.27 joerg 623: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 624: }
1.12 kristaps 625: }
626:
1.23 schwarze 627: return(found && !enclosed);
1.44 kristaps 628: }
1.50 kristaps 629:
630: /*
631: * Convert a string to a long that may not be <0.
632: * If the string is invalid, or is less than 0, return -1.
633: */
634: int
1.54 kristaps 635: mandoc_strntoi(const char *p, size_t sz, int base)
1.50 kristaps 636: {
637: char buf[32];
638: char *ep;
639: long v;
640:
641: if (sz > 31)
642: return(-1);
643:
644: memcpy(buf, p, sz);
1.51 kristaps 645: buf[(int)sz] = '\0';
1.50 kristaps 646:
647: errno = 0;
648: v = strtol(buf, &ep, base);
649:
650: if (buf[0] == '\0' || *ep != '\0')
651: return(-1);
652:
1.54 kristaps 653: if (v > INT_MAX)
654: v = INT_MAX;
655: if (v < INT_MIN)
656: v = INT_MIN;
1.50 kristaps 657:
658: return((int)v);
659: }
CVSweb