Annotation of mandoc/mandoc.c, Revision 1.44
1.44 ! kristaps 1: /* $Id: mandoc.c,v 1.43 2011/03/22 14:05:45 kristaps Exp $ */
1.1 kristaps 2: /*
1.22 kristaps 3: * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.36 schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.36 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.36 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.9 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
1.7 kristaps 20: #endif
21:
1.2 kristaps 22: #include <sys/types.h>
23:
1.1 kristaps 24: #include <assert.h>
25: #include <ctype.h>
26: #include <stdlib.h>
1.4 kristaps 27: #include <stdio.h>
28: #include <string.h>
1.7 kristaps 29: #include <time.h>
1.1 kristaps 30:
1.18 kristaps 31: #include "mandoc.h"
1.1 kristaps 32: #include "libmandoc.h"
33:
1.37 schwarze 34: #define DATESIZE 32
35:
1.18 kristaps 36: static int a2time(time_t *, const char *, const char *);
1.37 schwarze 37: static char *time2a(time_t);
1.7 kristaps 38:
1.1 kristaps 39: int
1.18 kristaps 40: mandoc_special(char *p)
1.1 kristaps 41: {
1.22 kristaps 42: int len, i;
43: char term;
1.18 kristaps 44: char *sv;
1.1 kristaps 45:
1.22 kristaps 46: len = 0;
47: term = '\0';
1.18 kristaps 48: sv = p;
49:
1.22 kristaps 50: assert('\\' == *p);
51: p++;
1.1 kristaps 52:
1.22 kristaps 53: switch (*p++) {
1.24 kristaps 54: #if 0
55: case ('Z'):
56: /* FALLTHROUGH */
57: case ('X'):
58: /* FALLTHROUGH */
59: case ('x'):
60: /* FALLTHROUGH */
61: case ('S'):
62: /* FALLTHROUGH */
63: case ('R'):
64: /* FALLTHROUGH */
65: case ('N'):
66: /* FALLTHROUGH */
67: case ('l'):
68: /* FALLTHROUGH */
69: case ('L'):
70: /* FALLTHROUGH */
71: case ('H'):
72: /* FALLTHROUGH */
73: case ('h'):
74: /* FALLTHROUGH */
75: case ('D'):
76: /* FALLTHROUGH */
77: case ('C'):
78: /* FALLTHROUGH */
79: case ('b'):
80: /* FALLTHROUGH */
81: case ('B'):
82: /* FALLTHROUGH */
83: case ('a'):
84: /* FALLTHROUGH */
85: case ('A'):
86: if (*p++ != '\'')
87: return(0);
88: term = '\'';
89: break;
90: #endif
1.28 kristaps 91: case ('h'):
92: /* FALLTHROUGH */
93: case ('v'):
94: /* FALLTHROUGH */
1.8 kristaps 95: case ('s'):
1.22 kristaps 96: if (ASCII_HYPH == *p)
97: *p = '-';
1.28 kristaps 98:
99: i = 0;
100: if ('+' == *p || '-' == *p) {
1.22 kristaps 101: p++;
1.28 kristaps 102: i = 1;
103: }
1.8 kristaps 104:
1.22 kristaps 105: switch (*p++) {
106: case ('('):
107: len = 2;
108: break;
109: case ('['):
110: term = ']';
111: break;
112: case ('\''):
113: term = '\'';
114: break;
1.26 kristaps 115: case ('0'):
1.28 kristaps 116: i = 1;
1.26 kristaps 117: /* FALLTHROUGH */
1.22 kristaps 118: default:
119: len = 1;
120: p--;
121: break;
1.8 kristaps 122: }
123:
1.22 kristaps 124: if (ASCII_HYPH == *p)
125: *p = '-';
126: if ('+' == *p || '-' == *p) {
1.28 kristaps 127: if (i)
1.22 kristaps 128: return(0);
129: p++;
130: }
131:
1.33 kristaps 132: /* Handle embedded numerical subexp or escape. */
133:
134: if ('(' == *p) {
135: while (*p && ')' != *p)
136: if ('\\' == *p++) {
137: i = mandoc_special(--p);
138: if (0 == i)
139: return(0);
140: p += i;
141: }
142:
143: if (')' == *p++)
144: break;
145:
146: return(0);
147: } else if ('\\' == *p) {
148: if (0 == (i = mandoc_special(p)))
149: return(0);
150: p += i;
151: }
152:
1.22 kristaps 153: break;
1.24 kristaps 154: #if 0
155: case ('Y'):
156: /* FALLTHROUGH */
157: case ('V'):
158: /* FALLTHROUGH */
159: case ('$'):
160: /* FALLTHROUGH */
161: case ('n'):
162: /* FALLTHROUGH */
1.32 kristaps 163: #endif
1.24 kristaps 164: case ('k'):
165: /* FALLTHROUGH */
166: case ('M'):
167: /* FALLTHROUGH */
168: case ('m'):
169: /* FALLTHROUGH */
1.11 kristaps 170: case ('f'):
171: /* FALLTHROUGH */
172: case ('F'):
173: /* FALLTHROUGH */
1.1 kristaps 174: case ('*'):
1.22 kristaps 175: switch (*p++) {
1.1 kristaps 176: case ('('):
1.22 kristaps 177: len = 2;
178: break;
1.1 kristaps 179: case ('['):
1.22 kristaps 180: term = ']';
181: break;
1.1 kristaps 182: default:
1.22 kristaps 183: len = 1;
184: p--;
1.1 kristaps 185: break;
186: }
1.22 kristaps 187: break;
1.1 kristaps 188: case ('('):
1.22 kristaps 189: len = 2;
190: break;
1.1 kristaps 191: case ('['):
1.22 kristaps 192: term = ']';
1.30 kristaps 193: break;
194: case ('z'):
195: len = 1;
196: if ('\\' == *p) {
1.33 kristaps 197: if (0 == (i = mandoc_special(p)))
198: return(0);
199: p += i;
1.30 kristaps 200: return(*p ? (int)(p - sv) : 0);
201: }
1.1 kristaps 202: break;
1.34 kristaps 203: case ('o'):
204: /* FALLTHROUGH */
1.31 kristaps 205: case ('w'):
206: if ('\'' == *p++) {
207: term = '\'';
208: break;
209: }
210: /* FALLTHROUGH */
1.1 kristaps 211: default:
1.22 kristaps 212: len = 1;
213: p--;
214: break;
1.1 kristaps 215: }
216:
1.22 kristaps 217: if (term) {
218: for ( ; *p && term != *p; p++)
219: if (ASCII_HYPH == *p)
220: *p = '-';
1.24 kristaps 221: return(*p ? (int)(p - sv) : 0);
1.22 kristaps 222: }
1.1 kristaps 223:
1.22 kristaps 224: for (i = 0; *p && i < len; i++, p++)
225: if (ASCII_HYPH == *p)
226: *p = '-';
1.24 kristaps 227: return(i == len ? (int)(p - sv) : 0);
1.1 kristaps 228: }
229:
1.4 kristaps 230:
231: void *
232: mandoc_calloc(size_t num, size_t size)
233: {
234: void *ptr;
235:
236: ptr = calloc(num, size);
237: if (NULL == ptr) {
1.6 kristaps 238: perror(NULL);
1.35 kristaps 239: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 240: }
241:
242: return(ptr);
243: }
244:
245:
246: void *
247: mandoc_malloc(size_t size)
248: {
249: void *ptr;
250:
251: ptr = malloc(size);
252: if (NULL == ptr) {
1.6 kristaps 253: perror(NULL);
1.35 kristaps 254: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 255: }
256:
257: return(ptr);
258: }
259:
260:
261: void *
262: mandoc_realloc(void *ptr, size_t size)
263: {
264:
265: ptr = realloc(ptr, size);
266: if (NULL == ptr) {
1.6 kristaps 267: perror(NULL);
1.35 kristaps 268: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 269: }
270:
271: return(ptr);
272: }
273:
274:
275: char *
276: mandoc_strdup(const char *ptr)
277: {
278: char *p;
279:
280: p = strdup(ptr);
281: if (NULL == p) {
1.6 kristaps 282: perror(NULL);
1.35 kristaps 283: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 284: }
285:
286: return(p);
1.36 schwarze 287: }
288:
289: /*
290: * Parse a quoted or unquoted roff-style request or macro argument.
291: * Return a pointer to the parsed argument, which is either the original
292: * pointer or advanced by one byte in case the argument is quoted.
293: * Null-terminate the argument in place.
294: * Collapse pairs of quotes inside quoted arguments.
295: * Advance the argument pointer to the next argument,
296: * or to the null byte terminating the argument line.
297: */
298: char *
1.42 kristaps 299: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.36 schwarze 300: {
301: char *start, *cp;
302: int quoted, pairs, white;
303:
304: /* Quoting can only start with a new word. */
305: start = *cpp;
306: if ('"' == *start) {
307: quoted = 1;
308: start++;
309: } else
310: quoted = 0;
311:
312: pairs = 0;
313: white = 0;
314: for (cp = start; '\0' != *cp; cp++) {
315: /* Move left after quoted quotes and escaped backslashes. */
316: if (pairs)
317: cp[-pairs] = cp[0];
318: if ('\\' == cp[0]) {
319: if ('\\' == cp[1]) {
320: /* Poor man's copy mode. */
321: pairs++;
322: cp++;
323: } else if (0 == quoted && ' ' == cp[1])
324: /* Skip escaped blanks. */
325: cp++;
326: } else if (0 == quoted) {
327: if (' ' == cp[0]) {
328: /* Unescaped blanks end unquoted args. */
329: white = 1;
330: break;
331: }
332: } else if ('"' == cp[0]) {
333: if ('"' == cp[1]) {
334: /* Quoted quotes collapse. */
335: pairs++;
336: cp++;
337: } else {
338: /* Unquoted quotes end quoted args. */
339: quoted = 2;
340: break;
341: }
342: }
343: }
344:
345: /* Quoted argument without a closing quote. */
1.42 kristaps 346: if (1 == quoted)
347: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.36 schwarze 348:
349: /* Null-terminate this argument and move to the next one. */
350: if (pairs)
351: cp[-pairs] = '\0';
352: if ('\0' != *cp) {
353: *cp++ = '\0';
354: while (' ' == *cp)
355: cp++;
356: }
1.39 kristaps 357: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.36 schwarze 358: *cpp = cp;
359:
1.42 kristaps 360: if ('\0' == *cp && (white || ' ' == cp[-1]))
361: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.36 schwarze 362:
363: return(start);
1.4 kristaps 364: }
1.7 kristaps 365:
366: static int
367: a2time(time_t *t, const char *fmt, const char *p)
368: {
369: struct tm tm;
370: char *pp;
371:
372: memset(&tm, 0, sizeof(struct tm));
373:
374: pp = strptime(p, fmt, &tm);
375: if (NULL != pp && '\0' == *pp) {
376: *t = mktime(&tm);
377: return(1);
378: }
379:
380: return(0);
381: }
382:
1.37 schwarze 383: static char *
384: time2a(time_t t)
385: {
386: struct tm tm;
1.38 schwarze 387: char *buf, *p;
388: size_t ssz;
1.37 schwarze 389: int isz;
390:
391: localtime_r(&t, &tm);
392:
1.38 schwarze 393: /*
394: * Reserve space:
395: * up to 9 characters for the month (September) + blank
396: * up to 2 characters for the day + comma + blank
397: * 4 characters for the year and a terminating '\0'
398: */
399: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
400:
401: if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm)))
402: goto fail;
403: p += (int)ssz;
1.37 schwarze 404:
1.38 schwarze 405: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday)))
406: goto fail;
1.37 schwarze 407: p += isz;
408:
1.38 schwarze 409: if (0 == strftime(p, 4 + 1, "%Y", &tm))
410: goto fail;
411: return(buf);
412:
413: fail:
414: free(buf);
415: return(NULL);
1.37 schwarze 416: }
417:
418: char *
1.42 kristaps 419: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.7 kristaps 420: {
1.37 schwarze 421: char *out;
1.7 kristaps 422: time_t t;
423:
1.37 schwarze 424: if (NULL == in || '\0' == *in ||
425: 0 == strcmp(in, "$" "Mdocdate$")) {
1.42 kristaps 426: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.37 schwarze 427: time(&t);
428: }
429: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
430: !a2time(&t, "%b %d, %Y", in) &&
431: !a2time(&t, "%Y-%m-%d", in)) {
1.42 kristaps 432: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.37 schwarze 433: t = 0;
1.7 kristaps 434: }
1.37 schwarze 435: out = t ? time2a(t) : NULL;
1.38 schwarze 436: return(out ? out : mandoc_strdup(in));
1.7 kristaps 437: }
438:
1.12 kristaps 439: int
1.23 schwarze 440: mandoc_eos(const char *p, size_t sz, int enclosed)
1.12 kristaps 441: {
1.23 schwarze 442: const char *q;
443: int found;
1.12 kristaps 444:
1.13 kristaps 445: if (0 == sz)
446: return(0);
1.12 kristaps 447:
1.14 kristaps 448: /*
449: * End-of-sentence recognition must include situations where
450: * some symbols, such as `)', allow prior EOS punctuation to
451: * propogate outward.
452: */
453:
1.23 schwarze 454: found = 0;
1.25 kristaps 455: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 456: switch (*q) {
1.14 kristaps 457: case ('\"'):
458: /* FALLTHROUGH */
459: case ('\''):
1.15 kristaps 460: /* FALLTHROUGH */
461: case (']'):
1.14 kristaps 462: /* FALLTHROUGH */
463: case (')'):
1.23 schwarze 464: if (0 == found)
465: enclosed = 1;
1.14 kristaps 466: break;
467: case ('.'):
468: /* FALLTHROUGH */
469: case ('!'):
470: /* FALLTHROUGH */
471: case ('?'):
1.23 schwarze 472: found = 1;
473: break;
1.14 kristaps 474: default:
1.27 joerg 475: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 476: }
1.12 kristaps 477: }
478:
1.23 schwarze 479: return(found && !enclosed);
1.16 kristaps 480: }
481:
482: int
483: mandoc_hyph(const char *start, const char *c)
484: {
485:
486: /*
487: * Choose whether to break at a hyphenated character. We only
488: * do this if it's free-standing within a word.
489: */
490:
491: /* Skip first/last character of buffer. */
492: if (c == start || '\0' == *(c + 1))
493: return(0);
494: /* Skip first/last character of word. */
495: if ('\t' == *(c + 1) || '\t' == *(c - 1))
496: return(0);
497: if (' ' == *(c + 1) || ' ' == *(c - 1))
498: return(0);
499: /* Skip double invocations. */
500: if ('-' == *(c + 1) || '-' == *(c - 1))
501: return(0);
502: /* Skip escapes. */
503: if ('\\' == *(c - 1))
504: return(0);
505:
506: return(1);
1.40 kristaps 507: }
508:
1.44 ! kristaps 509: /*
! 510: * Find out whether a line is a macro line or not. If it is, adjust the
! 511: * current position and return one; if it isn't, return zero and don't
! 512: * change the current position.
! 513: */
! 514: int
! 515: mandoc_getcontrol(const char *cp, int *ppos)
! 516: {
! 517: int pos;
! 518:
! 519: pos = *ppos;
! 520:
! 521: if ('\\' == cp[pos] && '.' == cp[pos + 1])
! 522: pos += 2;
! 523: else if ('.' == cp[pos] || '\'' == cp[pos])
! 524: pos++;
! 525: else
! 526: return(0);
! 527:
! 528: while (' ' == cp[pos] || '\t' == cp[pos])
! 529: pos++;
! 530:
! 531: *ppos = pos;
! 532: return(1);
! 533: }
CVSweb