Annotation of mandoc/mandoc.c, Revision 1.35
1.35 ! kristaps 1: /* $Id: mandoc.c,v 1.34 2010/08/29 11:28:09 kristaps Exp $ */
1.1 kristaps 2: /*
1.22 kristaps 3: * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.9 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
1.7 kristaps 19: #endif
20:
1.2 kristaps 21: #include <sys/types.h>
22:
1.1 kristaps 23: #include <assert.h>
24: #include <ctype.h>
25: #include <stdlib.h>
1.4 kristaps 26: #include <stdio.h>
27: #include <string.h>
1.7 kristaps 28: #include <time.h>
1.1 kristaps 29:
1.18 kristaps 30: #include "mandoc.h"
1.1 kristaps 31: #include "libmandoc.h"
32:
1.18 kristaps 33: static int a2time(time_t *, const char *, const char *);
1.7 kristaps 34:
35:
1.1 kristaps 36: int
1.18 kristaps 37: mandoc_special(char *p)
1.1 kristaps 38: {
1.22 kristaps 39: int len, i;
40: char term;
1.18 kristaps 41: char *sv;
1.1 kristaps 42:
1.22 kristaps 43: len = 0;
44: term = '\0';
1.18 kristaps 45: sv = p;
46:
1.22 kristaps 47: assert('\\' == *p);
48: p++;
1.1 kristaps 49:
1.22 kristaps 50: switch (*p++) {
1.24 kristaps 51: #if 0
52: case ('Z'):
53: /* FALLTHROUGH */
54: case ('X'):
55: /* FALLTHROUGH */
56: case ('x'):
57: /* FALLTHROUGH */
58: case ('S'):
59: /* FALLTHROUGH */
60: case ('R'):
61: /* FALLTHROUGH */
62: case ('N'):
63: /* FALLTHROUGH */
64: case ('l'):
65: /* FALLTHROUGH */
66: case ('L'):
67: /* FALLTHROUGH */
68: case ('H'):
69: /* FALLTHROUGH */
70: case ('h'):
71: /* FALLTHROUGH */
72: case ('D'):
73: /* FALLTHROUGH */
74: case ('C'):
75: /* FALLTHROUGH */
76: case ('b'):
77: /* FALLTHROUGH */
78: case ('B'):
79: /* FALLTHROUGH */
80: case ('a'):
81: /* FALLTHROUGH */
82: case ('A'):
83: if (*p++ != '\'')
84: return(0);
85: term = '\'';
86: break;
87: #endif
1.28 kristaps 88: case ('h'):
89: /* FALLTHROUGH */
90: case ('v'):
91: /* FALLTHROUGH */
1.8 kristaps 92: case ('s'):
1.22 kristaps 93: if (ASCII_HYPH == *p)
94: *p = '-';
1.28 kristaps 95:
96: i = 0;
97: if ('+' == *p || '-' == *p) {
1.22 kristaps 98: p++;
1.28 kristaps 99: i = 1;
100: }
1.8 kristaps 101:
1.22 kristaps 102: switch (*p++) {
103: case ('('):
104: len = 2;
105: break;
106: case ('['):
107: term = ']';
108: break;
109: case ('\''):
110: term = '\'';
111: break;
1.26 kristaps 112: case ('0'):
1.28 kristaps 113: i = 1;
1.26 kristaps 114: /* FALLTHROUGH */
1.22 kristaps 115: default:
116: len = 1;
117: p--;
118: break;
1.8 kristaps 119: }
120:
1.22 kristaps 121: if (ASCII_HYPH == *p)
122: *p = '-';
123: if ('+' == *p || '-' == *p) {
1.28 kristaps 124: if (i)
1.22 kristaps 125: return(0);
126: p++;
127: }
128:
1.33 kristaps 129: /* Handle embedded numerical subexp or escape. */
130:
131: if ('(' == *p) {
132: while (*p && ')' != *p)
133: if ('\\' == *p++) {
134: i = mandoc_special(--p);
135: if (0 == i)
136: return(0);
137: p += i;
138: }
139:
140: if (')' == *p++)
141: break;
142:
143: return(0);
144: } else if ('\\' == *p) {
145: if (0 == (i = mandoc_special(p)))
146: return(0);
147: p += i;
148: }
149:
1.22 kristaps 150: break;
1.24 kristaps 151: #if 0
152: case ('Y'):
153: /* FALLTHROUGH */
154: case ('V'):
155: /* FALLTHROUGH */
156: case ('$'):
157: /* FALLTHROUGH */
158: case ('n'):
159: /* FALLTHROUGH */
1.32 kristaps 160: #endif
1.24 kristaps 161: case ('k'):
162: /* FALLTHROUGH */
163: case ('M'):
164: /* FALLTHROUGH */
165: case ('m'):
166: /* FALLTHROUGH */
1.11 kristaps 167: case ('f'):
168: /* FALLTHROUGH */
169: case ('F'):
170: /* FALLTHROUGH */
1.1 kristaps 171: case ('*'):
1.22 kristaps 172: switch (*p++) {
1.1 kristaps 173: case ('('):
1.22 kristaps 174: len = 2;
175: break;
1.1 kristaps 176: case ('['):
1.22 kristaps 177: term = ']';
178: break;
1.1 kristaps 179: default:
1.22 kristaps 180: len = 1;
181: p--;
1.1 kristaps 182: break;
183: }
1.22 kristaps 184: break;
1.1 kristaps 185: case ('('):
1.22 kristaps 186: len = 2;
187: break;
1.1 kristaps 188: case ('['):
1.22 kristaps 189: term = ']';
1.30 kristaps 190: break;
191: case ('z'):
192: len = 1;
193: if ('\\' == *p) {
1.33 kristaps 194: if (0 == (i = mandoc_special(p)))
195: return(0);
196: p += i;
1.30 kristaps 197: return(*p ? (int)(p - sv) : 0);
198: }
1.1 kristaps 199: break;
1.34 kristaps 200: case ('o'):
201: /* FALLTHROUGH */
1.31 kristaps 202: case ('w'):
203: if ('\'' == *p++) {
204: term = '\'';
205: break;
206: }
207: /* FALLTHROUGH */
1.1 kristaps 208: default:
1.22 kristaps 209: len = 1;
210: p--;
211: break;
1.1 kristaps 212: }
213:
1.22 kristaps 214: if (term) {
215: for ( ; *p && term != *p; p++)
216: if (ASCII_HYPH == *p)
217: *p = '-';
1.24 kristaps 218: return(*p ? (int)(p - sv) : 0);
1.22 kristaps 219: }
1.1 kristaps 220:
1.22 kristaps 221: for (i = 0; *p && i < len; i++, p++)
222: if (ASCII_HYPH == *p)
223: *p = '-';
1.24 kristaps 224: return(i == len ? (int)(p - sv) : 0);
1.1 kristaps 225: }
226:
1.4 kristaps 227:
228: void *
229: mandoc_calloc(size_t num, size_t size)
230: {
231: void *ptr;
232:
233: ptr = calloc(num, size);
234: if (NULL == ptr) {
1.6 kristaps 235: perror(NULL);
1.35 ! kristaps 236: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 237: }
238:
239: return(ptr);
240: }
241:
242:
243: void *
244: mandoc_malloc(size_t size)
245: {
246: void *ptr;
247:
248: ptr = malloc(size);
249: if (NULL == ptr) {
1.6 kristaps 250: perror(NULL);
1.35 ! kristaps 251: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 252: }
253:
254: return(ptr);
255: }
256:
257:
258: void *
259: mandoc_realloc(void *ptr, size_t size)
260: {
261:
262: ptr = realloc(ptr, size);
263: if (NULL == ptr) {
1.6 kristaps 264: perror(NULL);
1.35 ! kristaps 265: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 266: }
267:
268: return(ptr);
269: }
270:
271:
272: char *
273: mandoc_strdup(const char *ptr)
274: {
275: char *p;
276:
277: p = strdup(ptr);
278: if (NULL == p) {
1.6 kristaps 279: perror(NULL);
1.35 ! kristaps 280: exit((int)MANDOCLEVEL_SYSERR);
1.4 kristaps 281: }
282:
283: return(p);
284: }
1.7 kristaps 285:
286:
287: static int
288: a2time(time_t *t, const char *fmt, const char *p)
289: {
290: struct tm tm;
291: char *pp;
292:
293: memset(&tm, 0, sizeof(struct tm));
294:
295: pp = strptime(p, fmt, &tm);
296: if (NULL != pp && '\0' == *pp) {
297: *t = mktime(&tm);
298: return(1);
299: }
300:
301: return(0);
302: }
303:
304:
305: /*
306: * Convert from a manual date string (see mdoc(7) and man(7)) into a
307: * date according to the stipulated date type.
308: */
309: time_t
310: mandoc_a2time(int flags, const char *p)
311: {
312: time_t t;
313:
314: if (MTIME_MDOCDATE & flags) {
315: if (0 == strcmp(p, "$" "Mdocdate$"))
316: return(time(NULL));
317: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
318: return(t);
319: }
320:
321: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
322: if (a2time(&t, "%b %d, %Y", p))
323: return(t);
324:
325: if (MTIME_ISO_8601 & flags)
326: if (a2time(&t, "%Y-%m-%d", p))
327: return(t);
328:
329: if (MTIME_REDUCED & flags) {
330: if (a2time(&t, "%d, %Y", p))
331: return(t);
332: if (a2time(&t, "%Y", p))
333: return(t);
334: }
335:
336: return(0);
337: }
338:
1.12 kristaps 339:
340: int
1.23 schwarze 341: mandoc_eos(const char *p, size_t sz, int enclosed)
1.12 kristaps 342: {
1.23 schwarze 343: const char *q;
344: int found;
1.12 kristaps 345:
1.13 kristaps 346: if (0 == sz)
347: return(0);
1.12 kristaps 348:
1.14 kristaps 349: /*
350: * End-of-sentence recognition must include situations where
351: * some symbols, such as `)', allow prior EOS punctuation to
352: * propogate outward.
353: */
354:
1.23 schwarze 355: found = 0;
1.25 kristaps 356: for (q = p + (int)sz - 1; q >= p; q--) {
1.23 schwarze 357: switch (*q) {
1.14 kristaps 358: case ('\"'):
359: /* FALLTHROUGH */
360: case ('\''):
1.15 kristaps 361: /* FALLTHROUGH */
362: case (']'):
1.14 kristaps 363: /* FALLTHROUGH */
364: case (')'):
1.23 schwarze 365: if (0 == found)
366: enclosed = 1;
1.14 kristaps 367: break;
368: case ('.'):
369: /* FALLTHROUGH */
370: case ('!'):
371: /* FALLTHROUGH */
372: case ('?'):
1.23 schwarze 373: found = 1;
374: break;
1.14 kristaps 375: default:
1.27 joerg 376: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.14 kristaps 377: }
1.12 kristaps 378: }
379:
1.23 schwarze 380: return(found && !enclosed);
1.16 kristaps 381: }
382:
383:
384: int
385: mandoc_hyph(const char *start, const char *c)
386: {
387:
388: /*
389: * Choose whether to break at a hyphenated character. We only
390: * do this if it's free-standing within a word.
391: */
392:
393: /* Skip first/last character of buffer. */
394: if (c == start || '\0' == *(c + 1))
395: return(0);
396: /* Skip first/last character of word. */
397: if ('\t' == *(c + 1) || '\t' == *(c - 1))
398: return(0);
399: if (' ' == *(c + 1) || ' ' == *(c - 1))
400: return(0);
401: /* Skip double invocations. */
402: if ('-' == *(c + 1) || '-' == *(c - 1))
403: return(0);
404: /* Skip escapes. */
405: if ('\\' == *(c - 1))
406: return(0);
407:
408: return(1);
1.12 kristaps 409: }
CVSweb