Annotation of mandoc/chars.c, Revision 1.3
1.3 ! kristaps 1: /* $Id: chars.c,v 1.2 2009/09/17 08:21:42 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <assert.h>
18: #include <err.h>
19: #include <stdlib.h>
20: #include <string.h>
21:
22: #include "chars.h"
23:
1.2 kristaps 24: #define PRINT_HI 126
25: #define PRINT_LO 32
1.1 kristaps 26:
27: struct ln {
28: struct ln *next;
29: const char *code;
1.2 kristaps 30: const char *ascii;
31: const char *html;
1.1 kristaps 32: size_t codesz;
1.2 kristaps 33: size_t asciisz;
34: size_t htmlsz;
1.1 kristaps 35: int type;
36: #define CHARS_CHAR (1 << 0)
37: #define CHARS_STRING (1 << 1)
38: #define CHARS_BOTH (0x03)
39: };
40:
1.3 ! kristaps 41: #define LINES_MAX 267
1.1 kristaps 42:
1.2 kristaps 43: #define CHAR(w, x, y, z, a, b) \
44: { NULL, (w), (y), (a), (x), (z), (b), CHARS_CHAR },
45: #define STRING(w, x, y, z, a, b) \
46: { NULL, (w), (y), (a), (x), (z), (b), CHARS_STRING },
47: #define BOTH(w, x, y, z, a, b) \
48: { NULL, (w), (y), (a), (x), (z), (b), CHARS_BOTH },
1.1 kristaps 49:
50: static struct ln lines[LINES_MAX] = {
51: #include "chars.in"
52: };
53:
54: struct tbl {
1.2 kristaps 55: enum chars type;
1.1 kristaps 56: struct ln **htab;
57: };
58:
59: static inline int match(const struct ln *,
60: const char *, size_t, int);
61: static const char *find(struct tbl *, const char *,
62: size_t, size_t *, int);
63:
64:
65: void
66: chars_free(void *arg)
67: {
68: struct tbl *tab;
69:
70: tab = (struct tbl *)arg;
71:
72: free(tab->htab);
73: free(tab);
74: }
75:
76:
77: void *
78: chars_init(enum chars type)
79: {
80: struct tbl *tab;
81: struct ln **htab;
82: struct ln *pp;
83: int i, hash;
84:
85: /*
86: * Constructs a very basic chaining hashtable. The hash routine
87: * is simply the integral value of the first character.
88: * Subsequent entries are chained in the order they're processed
89: * (they're in-line re-ordered during lookup).
90: */
91:
92: if (NULL == (tab = malloc(sizeof(struct tbl))))
93: err(1, "malloc");
1.2 kristaps 94: tab->type = type;
1.1 kristaps 95:
1.2 kristaps 96: htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
1.1 kristaps 97: if (NULL == htab)
98: err(1, "malloc");
99:
100: for (i = 0; i < LINES_MAX; i++) {
1.2 kristaps 101: hash = (int)lines[i].code[0] - PRINT_LO;
1.1 kristaps 102:
103: if (NULL == (pp = htab[hash])) {
104: htab[hash] = &lines[i];
105: continue;
106: }
107:
108: for ( ; pp->next; pp = pp->next)
109: /* Scan ahead. */ ;
110: pp->next = &lines[i];
111: }
112:
113: tab->htab = htab;
114: return(tab);
115: }
116:
117:
118: const char *
119: chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
120: {
121:
122: return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR));
123: }
124:
125:
126: const char *
127: chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz)
128: {
129:
130: return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING));
131: }
132:
133:
134: static const char *
135: find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type)
136: {
137: struct ln *pp, *prev;
138: struct ln **htab;
139: int hash;
140:
141: assert(p);
142: assert(sz > 0);
143:
1.2 kristaps 144: if (p[0] < PRINT_LO || p[0] > PRINT_HI)
1.1 kristaps 145: return(NULL);
146:
147: /*
148: * Lookup the symbol in the symbol hash. See ascii2htab for the
149: * hashtable specs. This dynamically re-orders the hash chain
150: * to optimise for repeat hits.
151: */
152:
1.2 kristaps 153: hash = (int)p[0] - PRINT_LO;
1.1 kristaps 154: htab = tab->htab;
155:
156: if (NULL == (pp = htab[hash]))
157: return(NULL);
158:
159: if (NULL == pp->next) {
160: if ( ! match(pp, p, sz, type))
161: return(NULL);
1.2 kristaps 162:
163: if (CHARS_HTML == tab->type) {
164: *rsz = pp->htmlsz;
165: return(pp->html);
166: }
167: *rsz = pp->asciisz;
168: return(pp->ascii);
1.1 kristaps 169: }
170:
171: for (prev = NULL; pp; pp = pp->next) {
172: if ( ! match(pp, p, sz, type)) {
173: prev = pp;
174: continue;
175: }
176:
177: if (prev) {
178: prev->next = pp->next;
179: pp->next = htab[hash];
180: htab[hash] = pp;
181: }
182:
1.2 kristaps 183: if (CHARS_HTML == tab->type) {
184: *rsz = pp->htmlsz;
185: return(pp->html);
186: }
187: *rsz = pp->asciisz;
188: return(pp->ascii);
1.1 kristaps 189: }
190:
191: return(NULL);
192: }
193:
194:
195: static inline int
196: match(const struct ln *ln, const char *p, size_t sz, int type)
197: {
198:
199: if ( ! (ln->type & type))
200: return(0);
201: if (ln->codesz != sz)
202: return(0);
203: return(0 == strncmp(ln->code, p, sz));
204: }
CVSweb