Annotation of mandoc/mandoc.c, Revision 1.19
1.19 ! kristaps 1: /* $Id: mandoc.c,v 1.18 2010/06/09 19:22:56 kristaps Exp $ */
1.1 kristaps 2: /*
1.19 ! kristaps 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.9 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
1.7 kristaps 19: #endif
20:
1.2 kristaps 21: #include <sys/types.h>
22:
1.1 kristaps 23: #include <assert.h>
24: #include <ctype.h>
25: #include <stdlib.h>
1.4 kristaps 26: #include <stdio.h>
27: #include <string.h>
1.7 kristaps 28: #include <time.h>
1.1 kristaps 29:
1.18 kristaps 30: #include "mandoc.h"
1.1 kristaps 31: #include "libmandoc.h"
32:
1.18 kristaps 33: static int a2time(time_t *, const char *, const char *);
34: static int spec_norm(char *, int);
35:
36:
37: /*
38: * "Normalise" a special string by converting its ASCII_HYPH entries
39: * into actual hyphens.
40: */
41: static int
42: spec_norm(char *p, int sz)
43: {
44: int i;
45:
46: for (i = 0; i < sz; i++)
47: if (ASCII_HYPH == p[i])
48: p[i] = '-';
49:
50: return(sz);
51: }
1.7 kristaps 52:
53:
1.1 kristaps 54: int
1.18 kristaps 55: mandoc_special(char *p)
1.1 kristaps 56: {
1.8 kristaps 57: int terminator; /* Terminator for \s. */
58: int lim; /* Limit for N in \s. */
59: int c, i;
1.18 kristaps 60: char *sv;
1.1 kristaps 61:
1.18 kristaps 62: sv = p;
63:
1.1 kristaps 64: if ('\\' != *p++)
1.18 kristaps 65: return(spec_norm(sv, 0));
1.1 kristaps 66:
67: switch (*p) {
68: case ('\''):
69: /* FALLTHROUGH */
70: case ('`'):
71: /* FALLTHROUGH */
72: case ('q'):
73: /* FALLTHROUGH */
1.18 kristaps 74: case (ASCII_HYPH):
75: /* FALLTHROUGH */
1.1 kristaps 76: case ('-'):
77: /* FALLTHROUGH */
78: case ('~'):
79: /* FALLTHROUGH */
80: case ('^'):
81: /* FALLTHROUGH */
82: case ('%'):
83: /* FALLTHROUGH */
84: case ('0'):
85: /* FALLTHROUGH */
86: case (' '):
1.17 kristaps 87: /* FALLTHROUGH */
88: case ('}'):
1.1 kristaps 89: /* FALLTHROUGH */
90: case ('|'):
91: /* FALLTHROUGH */
92: case ('&'):
93: /* FALLTHROUGH */
94: case ('.'):
95: /* FALLTHROUGH */
96: case (':'):
97: /* FALLTHROUGH */
1.3 kristaps 98: case ('c'):
1.18 kristaps 99: /* FALLTHROUGH */
1.1 kristaps 100: case ('e'):
1.18 kristaps 101: return(spec_norm(sv, 2));
1.8 kristaps 102: case ('s'):
103: if ('\0' == *++p)
1.18 kristaps 104: return(spec_norm(sv, 2));
1.8 kristaps 105:
106: c = 2;
107: terminator = 0;
108: lim = 1;
109:
110: if (*p == '\'') {
111: lim = 0;
112: terminator = 1;
113: ++p;
114: ++c;
115: } else if (*p == '[') {
116: lim = 0;
117: terminator = 2;
118: ++p;
119: ++c;
120: } else if (*p == '(') {
121: lim = 2;
122: terminator = 3;
123: ++p;
124: ++c;
125: }
126:
127: if (*p == '+' || *p == '-') {
128: ++p;
129: ++c;
130: }
131:
132: if (*p == '\'') {
133: if (terminator)
1.18 kristaps 134: return(spec_norm(sv, 0));
1.8 kristaps 135: lim = 0;
136: terminator = 1;
137: ++p;
138: ++c;
139: } else if (*p == '[') {
140: if (terminator)
1.18 kristaps 141: return(spec_norm(sv, 0));
1.8 kristaps 142: lim = 0;
143: terminator = 2;
144: ++p;
145: ++c;
146: } else if (*p == '(') {
147: if (terminator)
1.18 kristaps 148: return(spec_norm(sv, 0));
1.8 kristaps 149: lim = 2;
150: terminator = 3;
151: ++p;
152: ++c;
153: }
154:
155: /* TODO: needs to handle floating point. */
156:
157: if ( ! isdigit((u_char)*p))
1.18 kristaps 158: return(spec_norm(sv, 0));
1.8 kristaps 159:
160: for (i = 0; isdigit((u_char)*p); i++) {
161: if (lim && i >= lim)
162: break;
163: ++p;
164: ++c;
165: }
166:
167: if (terminator && terminator < 3) {
168: if (1 == terminator && *p != '\'')
1.18 kristaps 169: return(spec_norm(sv, 0));
1.8 kristaps 170: if (2 == terminator && *p != ']')
1.18 kristaps 171: return(spec_norm(sv, 0));
1.8 kristaps 172: ++p;
173: ++c;
174: }
175:
1.18 kristaps 176: return(spec_norm(sv, c));
1.11 kristaps 177: case ('f'):
178: /* FALLTHROUGH */
179: case ('F'):
180: /* FALLTHROUGH */
1.1 kristaps 181: case ('*'):
1.18 kristaps 182: if ('\0' == *++p || isspace((u_char)*p))
183: return(spec_norm(sv, 0));
1.1 kristaps 184: switch (*p) {
185: case ('('):
1.18 kristaps 186: if ('\0' == *++p || isspace((u_char)*p))
187: return(spec_norm(sv, 0));
188: return(spec_norm(sv, 4));
1.1 kristaps 189: case ('['):
190: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.18 kristaps 191: if (isspace((u_char)*p))
1.1 kristaps 192: break;
1.18 kristaps 193: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 kristaps 194: default:
195: break;
196: }
1.18 kristaps 197: return(spec_norm(sv, 3));
1.1 kristaps 198: case ('('):
1.18 kristaps 199: if ('\0' == *++p || isspace((u_char)*p))
200: return(spec_norm(sv, 0));
201: if ('\0' == *++p || isspace((u_char)*p))
202: return(spec_norm(sv, 0));
203: return(spec_norm(sv, 4));
1.1 kristaps 204: case ('['):
205: break;
206: default:
1.18 kristaps 207: return(spec_norm(sv, 0));
1.1 kristaps 208: }
209:
210: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.18 kristaps 211: if (isspace((u_char)*p))
1.1 kristaps 212: break;
213:
1.18 kristaps 214: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 kristaps 215: }
216:
1.4 kristaps 217:
218: void *
219: mandoc_calloc(size_t num, size_t size)
220: {
221: void *ptr;
222:
223: ptr = calloc(num, size);
224: if (NULL == ptr) {
1.6 kristaps 225: perror(NULL);
1.4 kristaps 226: exit(EXIT_FAILURE);
227: }
228:
229: return(ptr);
230: }
231:
232:
233: void *
234: mandoc_malloc(size_t size)
235: {
236: void *ptr;
237:
238: ptr = malloc(size);
239: if (NULL == ptr) {
1.6 kristaps 240: perror(NULL);
1.4 kristaps 241: exit(EXIT_FAILURE);
242: }
243:
244: return(ptr);
245: }
246:
247:
248: void *
249: mandoc_realloc(void *ptr, size_t size)
250: {
251:
252: ptr = realloc(ptr, size);
253: if (NULL == ptr) {
1.6 kristaps 254: perror(NULL);
1.4 kristaps 255: exit(EXIT_FAILURE);
256: }
257:
258: return(ptr);
259: }
260:
261:
262: char *
263: mandoc_strdup(const char *ptr)
264: {
265: char *p;
266:
267: p = strdup(ptr);
268: if (NULL == p) {
1.6 kristaps 269: perror(NULL);
1.4 kristaps 270: exit(EXIT_FAILURE);
271: }
272:
273: return(p);
274: }
1.7 kristaps 275:
276:
277: static int
278: a2time(time_t *t, const char *fmt, const char *p)
279: {
280: struct tm tm;
281: char *pp;
282:
283: memset(&tm, 0, sizeof(struct tm));
284:
285: pp = strptime(p, fmt, &tm);
286: if (NULL != pp && '\0' == *pp) {
287: *t = mktime(&tm);
288: return(1);
289: }
290:
291: return(0);
292: }
293:
294:
295: /*
296: * Convert from a manual date string (see mdoc(7) and man(7)) into a
297: * date according to the stipulated date type.
298: */
299: time_t
300: mandoc_a2time(int flags, const char *p)
301: {
302: time_t t;
303:
304: if (MTIME_MDOCDATE & flags) {
305: if (0 == strcmp(p, "$" "Mdocdate$"))
306: return(time(NULL));
307: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
308: return(t);
309: }
310:
311: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
312: if (a2time(&t, "%b %d, %Y", p))
313: return(t);
314:
315: if (MTIME_ISO_8601 & flags)
316: if (a2time(&t, "%Y-%m-%d", p))
317: return(t);
318:
319: if (MTIME_REDUCED & flags) {
320: if (a2time(&t, "%d, %Y", p))
321: return(t);
322: if (a2time(&t, "%Y", p))
323: return(t);
324: }
325:
326: return(0);
327: }
328:
1.12 kristaps 329:
330: int
331: mandoc_eos(const char *p, size_t sz)
332: {
333:
1.13 kristaps 334: if (0 == sz)
335: return(0);
1.12 kristaps 336:
1.14 kristaps 337: /*
338: * End-of-sentence recognition must include situations where
339: * some symbols, such as `)', allow prior EOS punctuation to
340: * propogate outward.
341: */
342:
343: for ( ; sz; sz--) {
344: switch (p[(int)sz - 1]) {
345: case ('\"'):
346: /* FALLTHROUGH */
347: case ('\''):
1.15 kristaps 348: /* FALLTHROUGH */
349: case (']'):
1.14 kristaps 350: /* FALLTHROUGH */
351: case (')'):
352: break;
353: case ('.'):
354: /* Escaped periods. */
355: if (sz > 1 && '\\' == p[(int)sz - 2])
356: return(0);
357: /* FALLTHROUGH */
358: case ('!'):
359: /* FALLTHROUGH */
360: case ('?'):
361: return(1);
362: default:
1.12 kristaps 363: return(0);
1.14 kristaps 364: }
1.12 kristaps 365: }
366:
1.14 kristaps 367: return(0);
1.16 kristaps 368: }
369:
370:
371: int
372: mandoc_hyph(const char *start, const char *c)
373: {
374:
375: /*
376: * Choose whether to break at a hyphenated character. We only
377: * do this if it's free-standing within a word.
378: */
379:
380: /* Skip first/last character of buffer. */
381: if (c == start || '\0' == *(c + 1))
382: return(0);
383: /* Skip first/last character of word. */
384: if ('\t' == *(c + 1) || '\t' == *(c - 1))
385: return(0);
386: if (' ' == *(c + 1) || ' ' == *(c - 1))
387: return(0);
388: /* Skip double invocations. */
389: if ('-' == *(c + 1) || '-' == *(c - 1))
390: return(0);
391: /* Skip escapes. */
392: if ('\\' == *(c - 1))
393: return(0);
394:
395: return(1);
1.12 kristaps 396: }
CVSweb