Annotation of mandoc/mandoc.c, Revision 1.36
1.36 ! schwarze 1: /* $Id: mandoc.c,v 1.35 2010/09/04 20:18:53 kristaps Exp $ */
1.1 kristaps 2: /*
1.22 kristaps 3: * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.36 ! schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.36 ! schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.36 ! schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.9 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
1.7 kristaps 20: #endif
21:
1.2 kristaps 22: #include <sys/types.h>
23:
1.1 kristaps 24: #include <assert.h>
25: #include <ctype.h>
26: #include <stdlib.h>
1.4 kristaps 27: #include <stdio.h>
28: #include <string.h>
1.7 kristaps 29: #include <time.h>
1.1 kristaps 30:
1.18 kristaps 31: #include "mandoc.h"
1.1 kristaps 32: #include "libmandoc.h"
33:
1.18 kristaps 34: static int a2time(time_t *, const char *, const char *);
1.7 kristaps 35:
36:
1.1 kristaps 37: int
1.18 kristaps 38: mandoc_special(char *p)
1.1 kristaps 39: {
1.22 kristaps 40: int len, i;
41: char term;
1.18 kristaps 42: char *sv;
1.1 kristaps 43:
1.22 kristaps 44: len = 0;
45: term = '\0';
1.18 kristaps 46: sv = p;
47:
1.22 kristaps 48: assert('\\' == *p);
49: p++;
1.1 kristaps 50:
1.22 kristaps 51: switch (*p++) {
1.24 kristaps 52: #if 0
53: case ('Z'):
54: /* FALLTHROUGH */
55: case ('X'):
56: /* FALLTHROUGH */
57: case ('x'):
58: /* FALLTHROUGH */
59: case ('S'):
60: /* FALLTHROUGH */
61: case ('R'):
62: /* FALLTHROUGH */
63: case ('N'):
64: /* FALLTHROUGH */
65: case ('l'):
66: /* FALLTHROUGH */
67: case ('L'):
68: /* FALLTHROUGH */
69: case ('H'):
70: /* FALLTHROUGH */
71: case ('h'):
72: /* FALLTHROUGH */
73: case ('D'):
74: /* FALLTHROUGH */
75: case ('C'):
76: /* FALLTHROUGH */
77: case ('b'):
78: /* FALLTHROUGH */
79: case ('B'):
80: /* FALLTHROUGH */
81: case ('a'):
82: /* FALLTHROUGH */
83: case ('A'):
84: if (*p++ != '\'')
85: return(0);
86: term = '\'';
87: break;
88: #endif
1.28 kristaps 89: case ('h'):
90: /* FALLTHROUGH */
91: case ('v'):
92: /* FALLTHROUGH */
1.8 kristaps 93: case ('s'):
1.22 kristaps 94: if (ASCII_HYPH == *p)
95: *p = '-';
1.28 kristaps 96:
97: i = 0;
98: if ('+' == *p || '-' == *p) {
1.22 kristaps 99: p++;
1.28 kristaps 100: i = 1;
101: }
1.8 kristaps 102:
1.22 kristaps 103: switch (*p++) {
104: case ('('):
105: len = 2;
106: break;
107: case ('['):
108: term = ']';
109: break;
110: case ('\''):
111: term = '\'';
112: break;
1.26 kristaps 113: case ('0'):
1.28 kristaps 114: i = 1;
1.26 kristaps 115: /* FALLTHROUGH */
1.22 kristaps 116: default:
117: len = 1;
118: p--;
119: break;
1.8 kristaps 120: }
121:
1.22 kristaps 122: if (ASCII_HYPH == *p)
123: *p = '-';
124: if ('+' == *p || '-' == *p) {
1.28 kristaps 125: if (i)
1.22 kristaps 126: return(0);
127: p++;
128: }
129:
1.33 kristaps 130: /* Handle embedded numerical subexp or escape. */
131:
132: if ('(' == *p) {
133: while (*p && ')' != *p)
134: if ('\\' == *p++) {
135: i = mandoc_special(--p);
136: if (0 == i)
137: return(0);
138: p += i;
139: }
140:
141: if (')' == *p++)
142: break;
143:
144: return(0);
145: } else if ('\\' == *p) {
146: if (0 == (i = mandoc_special(p)))
147: return(0);
148: p += i;
149: }
150:
1.22 kristaps 151: break;
1.24 kristaps 152: #if 0
153: case ('Y'):
154: /* FALLTHROUGH */
155: case ('V'):
156: /* FALLTHROUGH */
157: case ('$'):
158: /* FALLTHROUGH */
159: case ('n'):
160: /* FALLTHROUGH */
1.32 kristaps 161: #endif
1.24 kristaps 162: case ('k'):
163: /* FALLTHROUGH */
164: case ('M'):
165: /* FALLTHROUGH */
166: case ('m'):
167: /* FALLTHROUGH */
1.11 kristaps 168: case ('f'):
169: /* FALLTHROUGH */
170: case ('F'):
171: /* FALLTHROUGH */
1.1 kristaps 172: case ('*'):
1.22 kristaps 173: switch (*p++) {
1.1 kristaps 174: case ('('):
1.22 kristaps 175: len = 2;
176: break;
1.1 kristaps 177: case ('['):
1.22 kristaps 178: term = ']';
179: break;
1.1 kristaps 180: default:
1.22 kristaps 181: len = 1;
182: p--;
1.1 kristaps 183: break;
184: }
1.22 kristaps 185: break;
1.1 kristaps 186: case ('('):
1.22 kristaps 187: len = 2;
188: break;
1.1 kristaps 189: case ('['):
1.22 kristaps 190: term = ']';
1.30 kristaps 191: break;
192: case ('z'):
193: len = 1;
194: if ('\\' == *p) {
1.33 kristaps 195: if (0 == (i = mandoc_special(p)))
196: return(0);
197: p += i;
1.30 kristaps 198: return(*p ? (int)(p - sv) : 0);
199: }
1.1 kristaps 200: break;
1.34 kristaps 201: case ('o'):
202: /* FALLTHROUGH */
1.31 kristaps 203: case ('w'):
204: if ('\'' == *p++) {
205: term = '\'';
206: break;
207: }
208: /* FALLTHROUGH */
1.1 kristaps 209: default:
1.22 kristaps 210: len = 1;
211: p--;
212: break;
1.1 kristaps 213: }
214:
1.22 kristaps 215: if (term) {
216: for ( ; *p && term != *p; p++)
217: if (ASCII_HYPH == *p)
218: *p = '-';
1.24 kristaps 219: return(*p ? (int)(p - sv) : 0);
1.22 kristaps 220: }
1.1 kristaps 221:
1.22 kristaps 222: for (i = 0; *p && i < len; i++, p++)
223: if (ASCII_HYPH == *p)
224: *p = '-';
1.24 kristaps 225: return(i == len ? (int)(p - sv) : 0);
1.1 kristaps 226: }
227:
1.4 kristaps 228:
229: void *
230: mandoc_calloc(size_t num, size_t size)
231: {
232: void *ptr;
233:
234: ptr = calloc(num, size);
235: if (NULL == ptr) {
1.6 kristaps 236: perror(NULL);
1.35 kristaps 237: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 238: }
239:
240: return(ptr);
241: }
242:
243:
244: void *
245: mandoc_malloc(size_t size)
246: {
247: void *ptr;
248:
249: ptr = malloc(size);
250: if (NULL == ptr) {
1.6 kristaps 251: perror(NULL);
1.35 kristaps 252: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 253: }
254:
255: return(ptr);
256: }
257:
258:
259: void *
260: mandoc_realloc(void *ptr, size_t size)
261: {
262:
263: ptr = realloc(ptr, size);
264: if (NULL == ptr) {
1.6 kristaps 265: perror(NULL);
1.35 kristaps 266: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 267: }
268:
269: return(ptr);
270: }
271:
272:
273: char *
274: mandoc_strdup(const char *ptr)
275: {
276: char *p;
277:
278: p = strdup(ptr);
279: if (NULL == p) {
1.6 kristaps 280: perror(NULL);
1.35 kristaps 281: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 282: }
283:
284: return(p);
1.36 ! schwarze 285: }
! 286:
! 287: /*
! 288: * Parse a quoted or unquoted roff-style request or macro argument.
! 289: * Return a pointer to the parsed argument, which is either the original
! 290: * pointer or advanced by one byte in case the argument is quoted.
! 291: * Null-terminate the argument in place.
! 292: * Collapse pairs of quotes inside quoted arguments.
! 293: * Advance the argument pointer to the next argument,
! 294: * or to the null byte terminating the argument line.
! 295: */
! 296: char *
! 297: mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos)
! 298: {
! 299: char *start, *cp;
! 300: int quoted, pairs, white;
! 301:
! 302: /* Quoting can only start with a new word. */
! 303: start = *cpp;
! 304: if ('"' == *start) {
! 305: quoted = 1;
! 306: start++;
! 307: } else
! 308: quoted = 0;
! 309:
! 310: pairs = 0;
! 311: white = 0;
! 312: for (cp = start; '\0' != *cp; cp++) {
! 313: /* Move left after quoted quotes and escaped backslashes. */
! 314: if (pairs)
! 315: cp[-pairs] = cp[0];
! 316: if ('\\' == cp[0]) {
! 317: if ('\\' == cp[1]) {
! 318: /* Poor man's copy mode. */
! 319: pairs++;
! 320: cp++;
! 321: } else if (0 == quoted && ' ' == cp[1])
! 322: /* Skip escaped blanks. */
! 323: cp++;
! 324: } else if (0 == quoted) {
! 325: if (' ' == cp[0]) {
! 326: /* Unescaped blanks end unquoted args. */
! 327: white = 1;
! 328: break;
! 329: }
! 330: } else if ('"' == cp[0]) {
! 331: if ('"' == cp[1]) {
! 332: /* Quoted quotes collapse. */
! 333: pairs++;
! 334: cp++;
! 335: } else {
! 336: /* Unquoted quotes end quoted args. */
! 337: quoted = 2;
! 338: break;
! 339: }
! 340: }
! 341: }
! 342:
! 343: /* Quoted argument without a closing quote. */
! 344: if (1 == quoted && msg)
! 345: (*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL);
! 346:
! 347: /* Null-terminate this argument and move to the next one. */
! 348: if (pairs)
! 349: cp[-pairs] = '\0';
! 350: if ('\0' != *cp) {
! 351: *cp++ = '\0';
! 352: while (' ' == *cp)
! 353: cp++;
! 354: }
! 355: *pos += (cp - start) + (quoted ? 1 : 0);
! 356: *cpp = cp;
! 357:
! 358: if ('\0' == *cp && msg && (white || ' ' == cp[-1]))
! 359: (*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL);
! 360:
! 361: return(start);
1.4 kristaps 362: }
1.7 kristaps 363:
364:
365: static int
366: a2time(time_t *t, const char *fmt, const char *p)
367: {
368: struct tm tm;
369: char *pp;
370:
371: memset(&tm, 0, sizeof(struct tm));
372:
373: pp = strptime(p, fmt, &tm);
374: if (NULL != pp && '\0' == *pp) {
375: *t = mktime(&tm);
376: return(1);
377: }
378:
379: return(0);
380: }
381:
382:
383: /*
384: * Convert from a manual date string (see mdoc(7) and man(7)) into a
385: * date according to the stipulated date type.
386: */
387: time_t
388: mandoc_a2time(int flags, const char *p)
389: {
390: time_t t;
391:
392: if (MTIME_MDOCDATE & flags) {
393: if (0 == strcmp(p, "$" "Mdocdate$"))
394: return(time(NULL));
395: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
396: return(t);
397: }
398:
399: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
400: if (a2time(&t, "%b %d, %Y", p))
401: return(t);
402:
403: if (MTIME_ISO_8601 & flags)
404: if (a2time(&t, "%Y-%m-%d", p))
405: return(t);
406:
407: if (MTIME_REDUCED & flags) {
408: if (a2time(&t, "%d, %Y", p))
409: return(t);
410: if (a2time(&t, "%Y", p))
411: return(t);
412: }
413:
414: return(0);
415: }
416:
1.12 kristaps 417:
418: int
1.23 schwarze 419: mandoc_eos(const char *p, size_t sz, int enclosed)
1.12 kristaps 420: {
1.23 schwarze 421: const char *q;
422: int found;
1.12 kristaps 423:
1.13 kristaps 424: if (0 == sz)
425: return(0);
1.12 kristaps 426:
1.14 kristaps 427: /*
428: * End-of-sentence recognition must include situations where
429: * some symbols, such as `)', allow prior EOS punctuation to
430: * propogate outward.
431: */
432:
1.23 schwarze 433: found = 0;
1.25 kristaps 434: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 435: switch (*q) {
1.14 kristaps 436: case ('\"'):
437: /* FALLTHROUGH */
438: case ('\''):
1.15 kristaps 439: /* FALLTHROUGH */
440: case (']'):
1.14 kristaps 441: /* FALLTHROUGH */
442: case (')'):
1.23 schwarze 443: if (0 == found)
444: enclosed = 1;
1.14 kristaps 445: break;
446: case ('.'):
447: /* FALLTHROUGH */
448: case ('!'):
449: /* FALLTHROUGH */
450: case ('?'):
1.23 schwarze 451: found = 1;
452: break;
1.14 kristaps 453: default:
1.27 joerg 454: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 455: }
1.12 kristaps 456: }
457:
1.23 schwarze 458: return(found && !enclosed);
1.16 kristaps 459: }
460:
461:
462: int
463: mandoc_hyph(const char *start, const char *c)
464: {
465:
466: /*
467: * Choose whether to break at a hyphenated character. We only
468: * do this if it's free-standing within a word.
469: */
470:
471: /* Skip first/last character of buffer. */
472: if (c == start || '\0' == *(c + 1))
473: return(0);
474: /* Skip first/last character of word. */
475: if ('\t' == *(c + 1) || '\t' == *(c - 1))
476: return(0);
477: if (' ' == *(c + 1) || ' ' == *(c - 1))
478: return(0);
479: /* Skip double invocations. */
480: if ('-' == *(c + 1) || '-' == *(c - 1))
481: return(0);
482: /* Skip escapes. */
483: if ('\\' == *(c - 1))
484: return(0);
485:
486: return(1);
1.12 kristaps 487: }
CVSweb