Annotation of mandoc/libmandoc.c, Revision 1.1
1.1 ! kristaps 1: /* $Id: mandoc.c,v 1.19 2010/06/19 20:46:28 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
! 8: *
! 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 16: */
! 17: #ifdef HAVE_CONFIG_H
! 18: #include "config.h"
! 19: #endif
! 20:
! 21: #include <sys/types.h>
! 22:
! 23: #include <assert.h>
! 24: #include <ctype.h>
! 25: #include <stdlib.h>
! 26: #include <stdio.h>
! 27: #include <string.h>
! 28: #include <time.h>
! 29:
! 30: #include "mandoc.h"
! 31: #include "libmandoc.h"
! 32:
! 33: static int a2time(time_t *, const char *, const char *);
! 34: static int spec_norm(char *, int);
! 35:
! 36:
! 37: /*
! 38: * "Normalise" a special string by converting its ASCII_HYPH entries
! 39: * into actual hyphens.
! 40: */
! 41: static int
! 42: spec_norm(char *p, int sz)
! 43: {
! 44: int i;
! 45:
! 46: for (i = 0; i < sz; i++)
! 47: if (ASCII_HYPH == p[i])
! 48: p[i] = '-';
! 49:
! 50: return(sz);
! 51: }
! 52:
! 53:
! 54: int
! 55: mandoc_special(char *p)
! 56: {
! 57: int terminator; /* Terminator for \s. */
! 58: int lim; /* Limit for N in \s. */
! 59: int c, i;
! 60: char *sv;
! 61:
! 62: sv = p;
! 63:
! 64: if ('\\' != *p++)
! 65: return(spec_norm(sv, 0));
! 66:
! 67: switch (*p) {
! 68: case ('\''):
! 69: /* FALLTHROUGH */
! 70: case ('`'):
! 71: /* FALLTHROUGH */
! 72: case ('q'):
! 73: /* FALLTHROUGH */
! 74: case (ASCII_HYPH):
! 75: /* FALLTHROUGH */
! 76: case ('-'):
! 77: /* FALLTHROUGH */
! 78: case ('~'):
! 79: /* FALLTHROUGH */
! 80: case ('^'):
! 81: /* FALLTHROUGH */
! 82: case ('%'):
! 83: /* FALLTHROUGH */
! 84: case ('0'):
! 85: /* FALLTHROUGH */
! 86: case (' '):
! 87: /* FALLTHROUGH */
! 88: case ('}'):
! 89: /* FALLTHROUGH */
! 90: case ('|'):
! 91: /* FALLTHROUGH */
! 92: case ('&'):
! 93: /* FALLTHROUGH */
! 94: case ('.'):
! 95: /* FALLTHROUGH */
! 96: case (':'):
! 97: /* FALLTHROUGH */
! 98: case ('c'):
! 99: /* FALLTHROUGH */
! 100: case ('e'):
! 101: return(spec_norm(sv, 2));
! 102: case ('s'):
! 103: if ('\0' == *++p)
! 104: return(spec_norm(sv, 2));
! 105:
! 106: c = 2;
! 107: terminator = 0;
! 108: lim = 1;
! 109:
! 110: if (*p == '\'') {
! 111: lim = 0;
! 112: terminator = 1;
! 113: ++p;
! 114: ++c;
! 115: } else if (*p == '[') {
! 116: lim = 0;
! 117: terminator = 2;
! 118: ++p;
! 119: ++c;
! 120: } else if (*p == '(') {
! 121: lim = 2;
! 122: terminator = 3;
! 123: ++p;
! 124: ++c;
! 125: }
! 126:
! 127: if (*p == '+' || *p == '-') {
! 128: ++p;
! 129: ++c;
! 130: }
! 131:
! 132: if (*p == '\'') {
! 133: if (terminator)
! 134: return(spec_norm(sv, 0));
! 135: lim = 0;
! 136: terminator = 1;
! 137: ++p;
! 138: ++c;
! 139: } else if (*p == '[') {
! 140: if (terminator)
! 141: return(spec_norm(sv, 0));
! 142: lim = 0;
! 143: terminator = 2;
! 144: ++p;
! 145: ++c;
! 146: } else if (*p == '(') {
! 147: if (terminator)
! 148: return(spec_norm(sv, 0));
! 149: lim = 2;
! 150: terminator = 3;
! 151: ++p;
! 152: ++c;
! 153: }
! 154:
! 155: /* TODO: needs to handle floating point. */
! 156:
! 157: if ( ! isdigit((u_char)*p))
! 158: return(spec_norm(sv, 0));
! 159:
! 160: for (i = 0; isdigit((u_char)*p); i++) {
! 161: if (lim && i >= lim)
! 162: break;
! 163: ++p;
! 164: ++c;
! 165: }
! 166:
! 167: if (terminator && terminator < 3) {
! 168: if (1 == terminator && *p != '\'')
! 169: return(spec_norm(sv, 0));
! 170: if (2 == terminator && *p != ']')
! 171: return(spec_norm(sv, 0));
! 172: ++p;
! 173: ++c;
! 174: }
! 175:
! 176: return(spec_norm(sv, c));
! 177: case ('f'):
! 178: /* FALLTHROUGH */
! 179: case ('F'):
! 180: /* FALLTHROUGH */
! 181: case ('*'):
! 182: if ('\0' == *++p || isspace((u_char)*p))
! 183: return(spec_norm(sv, 0));
! 184: switch (*p) {
! 185: case ('('):
! 186: if ('\0' == *++p || isspace((u_char)*p))
! 187: return(spec_norm(sv, 0));
! 188: return(spec_norm(sv, 4));
! 189: case ('['):
! 190: for (c = 3, p++; *p && ']' != *p; p++, c++)
! 191: if (isspace((u_char)*p))
! 192: break;
! 193: return(spec_norm(sv, *p == ']' ? c : 0));
! 194: default:
! 195: break;
! 196: }
! 197: return(spec_norm(sv, 3));
! 198: case ('('):
! 199: if ('\0' == *++p || isspace((u_char)*p))
! 200: return(spec_norm(sv, 0));
! 201: if ('\0' == *++p || isspace((u_char)*p))
! 202: return(spec_norm(sv, 0));
! 203: return(spec_norm(sv, 4));
! 204: case ('['):
! 205: break;
! 206: default:
! 207: return(spec_norm(sv, 0));
! 208: }
! 209:
! 210: for (c = 3, p++; *p && ']' != *p; p++, c++)
! 211: if (isspace((u_char)*p))
! 212: break;
! 213:
! 214: return(spec_norm(sv, *p == ']' ? c : 0));
! 215: }
! 216:
! 217:
! 218: void *
! 219: mandoc_calloc(size_t num, size_t size)
! 220: {
! 221: void *ptr;
! 222:
! 223: ptr = calloc(num, size);
! 224: if (NULL == ptr) {
! 225: perror(NULL);
! 226: exit(EXIT_FAILURE);
! 227: }
! 228:
! 229: return(ptr);
! 230: }
! 231:
! 232:
! 233: void *
! 234: mandoc_malloc(size_t size)
! 235: {
! 236: void *ptr;
! 237:
! 238: ptr = malloc(size);
! 239: if (NULL == ptr) {
! 240: perror(NULL);
! 241: exit(EXIT_FAILURE);
! 242: }
! 243:
! 244: return(ptr);
! 245: }
! 246:
! 247:
! 248: void *
! 249: mandoc_realloc(void *ptr, size_t size)
! 250: {
! 251:
! 252: ptr = realloc(ptr, size);
! 253: if (NULL == ptr) {
! 254: perror(NULL);
! 255: exit(EXIT_FAILURE);
! 256: }
! 257:
! 258: return(ptr);
! 259: }
! 260:
! 261:
! 262: char *
! 263: mandoc_strdup(const char *ptr)
! 264: {
! 265: char *p;
! 266:
! 267: p = strdup(ptr);
! 268: if (NULL == p) {
! 269: perror(NULL);
! 270: exit(EXIT_FAILURE);
! 271: }
! 272:
! 273: return(p);
! 274: }
! 275:
! 276:
! 277: static int
! 278: a2time(time_t *t, const char *fmt, const char *p)
! 279: {
! 280: struct tm tm;
! 281: char *pp;
! 282:
! 283: memset(&tm, 0, sizeof(struct tm));
! 284:
! 285: pp = strptime(p, fmt, &tm);
! 286: if (NULL != pp && '\0' == *pp) {
! 287: *t = mktime(&tm);
! 288: return(1);
! 289: }
! 290:
! 291: return(0);
! 292: }
! 293:
! 294:
! 295: /*
! 296: * Convert from a manual date string (see mdoc(7) and man(7)) into a
! 297: * date according to the stipulated date type.
! 298: */
! 299: time_t
! 300: mandoc_a2time(int flags, const char *p)
! 301: {
! 302: time_t t;
! 303:
! 304: if (MTIME_MDOCDATE & flags) {
! 305: if (0 == strcmp(p, "$" "Mdocdate$"))
! 306: return(time(NULL));
! 307: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
! 308: return(t);
! 309: }
! 310:
! 311: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
! 312: if (a2time(&t, "%b %d, %Y", p))
! 313: return(t);
! 314:
! 315: if (MTIME_ISO_8601 & flags)
! 316: if (a2time(&t, "%Y-%m-%d", p))
! 317: return(t);
! 318:
! 319: if (MTIME_REDUCED & flags) {
! 320: if (a2time(&t, "%d, %Y", p))
! 321: return(t);
! 322: if (a2time(&t, "%Y", p))
! 323: return(t);
! 324: }
! 325:
! 326: return(0);
! 327: }
! 328:
! 329:
! 330: int
! 331: mandoc_eos(const char *p, size_t sz)
! 332: {
! 333:
! 334: if (0 == sz)
! 335: return(0);
! 336:
! 337: /*
! 338: * End-of-sentence recognition must include situations where
! 339: * some symbols, such as `)', allow prior EOS punctuation to
! 340: * propogate outward.
! 341: */
! 342:
! 343: for ( ; sz; sz--) {
! 344: switch (p[(int)sz - 1]) {
! 345: case ('\"'):
! 346: /* FALLTHROUGH */
! 347: case ('\''):
! 348: /* FALLTHROUGH */
! 349: case (']'):
! 350: /* FALLTHROUGH */
! 351: case (')'):
! 352: break;
! 353: case ('.'):
! 354: /* Escaped periods. */
! 355: if (sz > 1 && '\\' == p[(int)sz - 2])
! 356: return(0);
! 357: /* FALLTHROUGH */
! 358: case ('!'):
! 359: /* FALLTHROUGH */
! 360: case ('?'):
! 361: return(1);
! 362: default:
! 363: return(0);
! 364: }
! 365: }
! 366:
! 367: return(0);
! 368: }
! 369:
! 370:
! 371: int
! 372: mandoc_hyph(const char *start, const char *c)
! 373: {
! 374:
! 375: /*
! 376: * Choose whether to break at a hyphenated character. We only
! 377: * do this if it's free-standing within a word.
! 378: */
! 379:
! 380: /* Skip first/last character of buffer. */
! 381: if (c == start || '\0' == *(c + 1))
! 382: return(0);
! 383: /* Skip first/last character of word. */
! 384: if ('\t' == *(c + 1) || '\t' == *(c - 1))
! 385: return(0);
! 386: if (' ' == *(c + 1) || ' ' == *(c - 1))
! 387: return(0);
! 388: /* Skip double invocations. */
! 389: if ('-' == *(c + 1) || '-' == *(c - 1))
! 390: return(0);
! 391: /* Skip escapes. */
! 392: if ('\\' == *(c - 1))
! 393: return(0);
! 394:
! 395: return(1);
! 396: }
CVSweb