Annotation of mandoc/html4_strict.c, Revision 1.2
1.2 ! kristaps 1: /* $Id: html4_strict.c,v 1.1 2008/11/23 16:53:18 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the
7: * above copyright notice and this permission notice appear in all
8: * copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17: * PERFORMANCE OF THIS SOFTWARE.
18: */
19: #include <assert.h>
20: #include <ctype.h>
21: #include <err.h>
22: #include <stdlib.h>
23: #include <stdio.h>
24: #include <string.h>
25: #include <time.h>
26:
27: #include "libmdocml.h"
28: #include "private.h"
29:
30: enum roffd {
31: ROFF_ENTER = 0,
32: ROFF_EXIT
33: };
34:
35: enum rofftype {
36: ROFF_NONE = 0,
37: ROFF_LAYOUT
38: };
39:
40: struct rofftree;
41:
42: #define ROFFCALL_ARGS const struct md_args *arg, \
43: struct md_mbuf *out, \
44: const struct md_rbuf *in, \
45: const char *buf, size_t sz, \
46: size_t pos, enum roffd type, \
47: struct rofftree *tree
48: typedef int (*roffcall)(ROFFCALL_ARGS);
49:
50: static int roff_Dd(ROFFCALL_ARGS);
51: static int roff_Dt(ROFFCALL_ARGS);
52: static int roff_Os(ROFFCALL_ARGS);
53: static int roff_Sh(ROFFCALL_ARGS);
54:
55: struct rofftok {
56: char id;
57: #define ROFF___ 0
58: #define ROFF_Dd 1
59: #define ROFF_Dt 2
60: #define ROFF_Os 3
61: #define ROFF_Sh 4
62: #define ROFF_Max 5
63: char name[2];
64: roffcall cb;
65: enum rofftype type;
66: int flags;
67: #define ROFF_NESTED (1 << 0)
68: };
69:
70: static const struct rofftok tokens[ROFF_Max] = {
71: { ROFF___, "\\\"", NULL, ROFF_NONE, 0 },
72: { ROFF_Dd, "Dd", roff_Dd, ROFF_NONE, 0 },
73: { ROFF_Dt, "Dt", roff_Dt, ROFF_NONE, 0 },
74: { ROFF_Os, "Os", roff_Os, ROFF_LAYOUT, 0 },
75: { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 },
76: };
77:
78: struct roffnode {
79: int tok;
80: struct roffnode *parent;
81: /* TODO: line number at acquisition. */
82: };
83:
84: struct rofftree {
85: struct roffnode *last;
86: time_t date;
87: char title[256];
88: char section[256];
89: char volume[256];
90: int state;
91: #define ROFF_PRELUDE_Os (1 << 1)
92: #define ROFF_PRELUDE_Dt (1 << 2)
93: #define ROFF_PRELUDE_Dd (1 << 3)
94: };
95:
1.2 ! kristaps 96: static int rofffind(const char *);
! 97: static int roffparse(const struct md_args *,
1.1 kristaps 98: struct md_mbuf *,
99: const struct md_rbuf *,
100: const char *, size_t,
101: struct rofftree *);
1.2 ! kristaps 102: static int textparse(struct md_mbuf *,
1.1 kristaps 103: const struct md_rbuf *,
104: const char *, size_t,
105: const struct rofftree *);
106:
1.2 ! kristaps 107: static void dbg_enter(const struct md_args *, int);
! 108: static void dbg_leave(const struct md_args *, int);
! 109:
1.1 kristaps 110:
111: int
112: md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out,
113: const struct md_rbuf *in, void *data)
114: {
115: struct rofftree *tree;
116: int error;
117:
118: assert(args);
119: assert(data);
120: tree = (struct rofftree *)data;
121: error = 0;
122:
123: while (tree->last)
124: if ( ! (*tokens[tree->last->tok].cb)
125: (args, error ? NULL : out, in, NULL,
126: 0, 0, ROFF_EXIT, tree))
127: error = 1;
128:
129: free(tree);
130: return(error ? 0 : 1);
131: }
132:
133:
134: int
135: md_init_html4_strict(const struct md_args *args, struct md_mbuf *out,
136: const struct md_rbuf *in, void **data)
137: {
138: struct rofftree *tree;
139:
140: assert(args);
141: assert(in);
142: assert(out);
143: assert(data);
144:
145: /* TODO: write HTML-DTD header. */
146:
147: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
148: warn("malloc");
149: return(0);
150: }
151:
152: *data = tree;
153: return(1);
154: }
155:
156:
157: int
158: md_line_html4_strict(const struct md_args *args, struct md_mbuf *out,
159: const struct md_rbuf *in, const char *buf,
160: size_t sz, void *data)
161: {
162: struct rofftree *tree;
163:
164: assert(args);
165: assert(in);
166: assert(data);
167:
168: tree = (struct rofftree *)data;
169:
170: if (0 == sz) {
171: warnx("%s: blank line (line %zu)", in->name, in->line);
172: return(0);
173: } else if ('.' != *buf)
174: return(textparse(out, in, buf, sz, tree));
175:
176: return(roffparse(args, out, in, buf, sz, tree));
177: }
178:
179:
180: static int
181: textparse(struct md_mbuf *out, const struct md_rbuf *in,
182: const char *buf, size_t sz,
183: const struct rofftree *tree)
184: {
185:
186: assert(tree);
187: assert(out);
188: assert(in);
189: assert(buf);
190: assert(sz > 0);
191:
192: if (NULL == tree->last) {
193: warnx("%s: unexpected text (line %zu)",
194: in->name, in->line);
195: return(0);
196: } else if (NULL == tree->last->parent) {
197: warnx("%s: disallowed text (line %zu)",
198: in->name, in->line);
199: return(0);
200: }
201:
202: if ( ! md_buf_puts(out, buf, sz))
203: return(0);
204: return(md_buf_putstring(out, " "));
205: }
206:
207:
208: static int
209: roffparse(const struct md_args *args, struct md_mbuf *out,
210: const struct md_rbuf *in, const char *buf,
211: size_t sz, struct rofftree *tree)
212: {
213: int tokid, t;
214: size_t pos;
215: struct roffnode *node;
216:
217: assert(args);
218: assert(out);
219: assert(in);
220: assert(buf);
221: assert(sz > 0);
222: assert(tree);
223:
224: /*
225: * Extract the token identifier from the buffer. If there's no
226: * callback for the token (comment, etc.) then exit immediately.
227: * We don't do any error handling (yet), so if the token doesn't
228: * exist, die.
229: */
230:
231: if (3 > sz) {
232: warnx("%s: malformed input (line %zu, col 1)",
233: in->name, in->line);
234: return(0);
235: } else if (ROFF_Max == (tokid = rofffind(buf + 1))) {
236: warnx("%s: unknown token `%c%c' (line %zu, col 1)",
237: in->name, *(buf + 1),
238: *(buf + 2), in->line);
239: return(0);
240: } else if (NULL == tokens[tokid].cb)
241: return(1); /* Skip token. */
242:
243: pos = 3;
244:
245: /*
246: * If this is a non-nestable layout token and we're below a
247: * token of the same type, then recurse upward to the token,
248: * closing out the interim scopes.
249: *
250: * If there's a nested token on the chain, then raise an error
251: * as nested tokens have corresponding "ending" tokens and we're
252: * breaking their scope.
253: */
254:
255: node = NULL;
256:
257: if (ROFF_LAYOUT == tokens[tokid].type &&
258: ! (ROFF_NESTED & tokens[tokid].flags)) {
259: for (node = tree->last; node; node = node->parent) {
260: if (node->tok == tokid)
261: break;
262:
263: /* Don't break nested scope. */
264:
265: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
266: continue;
267: warnx("%s: scope of %s broken by %s "
268: "(line %zu, col %zu)",
269: in->name, tokens[tokid].name,
270: tokens[node->tok].name,
271: in->line, pos);
272: return(0);
273: }
274: }
275: if (node) {
276: assert(ROFF_LAYOUT == tokens[tokid].type);
277: assert( ! (ROFF_NESTED & tokens[tokid].flags));
278: assert(node->tok == tokid);
279:
280: /* Clear up to last scoped token. */
281:
282: do {
283: t = tree->last->tok;
284: if ( ! (*tokens[tree->last->tok].cb)
285: (args, out, in, NULL,
286: 0, 0, ROFF_EXIT, tree))
287: return(0);
288: } while (t != tokid);
289: }
290:
291: /* Proceed with actual token processing. */
292:
293: return((*tokens[tokid].cb)(args, out, in, buf, sz,
294: pos, ROFF_ENTER, tree));
295: }
296:
297:
298: static int
299: rofffind(const char *name)
300: {
301: size_t i;
302:
303: assert(name);
304: /* FIXME: use a table, this is slow but ok for now. */
305: for (i = 0; i < ROFF_Max; i++)
306: if (0 == strncmp(name, tokens[i].name, 2))
307: return(i);
308:
309: return(ROFF_Max);
310: }
311:
312:
313: /* ARGUSED */
314: static int
315: roff_Dd(ROFFCALL_ARGS)
316: {
317:
318: assert(in);
319: assert(tree);
320: assert(arg);
321: assert(out);
322: assert(buf);
323: assert(sz > 0);
324: assert(pos > 0);
325: assert(type == ROFF_ENTER);
326:
327: if (tree->last) {
328: warnx("%s: superfluous prelude (line %zu, col %zu)",
329: in->name, in->line, pos);
330: return(0);
331: }
332:
333: if (0 != tree->state) {
334: warnx("%s: bad manual prelude (line %zu, col %zu)",
335: in->name, in->line, pos);
336: return(1);
337: }
338:
339: /* TODO: parse date from buffer. */
340:
341: tree->date = time(NULL);
342: tree->state |= ROFF_PRELUDE_Dd;
1.2 ! kristaps 343:
! 344: (void)printf("Dd\n");
! 345:
1.1 kristaps 346: return(1);
347: }
348:
349:
350: static int
351: roff_Dt(ROFFCALL_ARGS)
352: {
353:
354: assert(in);
355: assert(tree);
356: assert(arg);
357: assert(out);
358: assert(buf);
359: assert(sz > 0);
360: assert(pos > 0);
361: assert(type == ROFF_ENTER);
362:
363: if (tree->last) {
364: warnx("%s: superfluous prelude (line %zu, col %zu)",
365: in->name, in->line, pos);
366: return(0);
367: }
368:
369: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
370: (ROFF_PRELUDE_Os & tree->state) ||
371: (ROFF_PRELUDE_Dt & tree->state)) {
372: warnx("%s: bad manual prelude (line %zu, col %zu)",
373: in->name, in->line, pos);
374: return(1);
375: }
376:
377: /* TODO: parse titles from buffer. */
378:
379: tree->state |= ROFF_PRELUDE_Dt;
1.2 ! kristaps 380:
! 381: (void)printf("Dt\n");
! 382:
1.1 kristaps 383: return(1);
384: }
385:
386:
387: static int
388: roff_Os(ROFFCALL_ARGS)
389: {
390: struct roffnode *node;
391:
392: assert(arg);
393: assert(tree);
394: assert(in);
395:
396: if (ROFF_EXIT == type) {
397: assert(tree->last);
398: assert(tree->last->tok == ROFF_Os);
399:
400: /* TODO: flush out ML footer. */
401:
402: node = tree->last;
403: tree->last = node->parent;
404: free(node);
405:
1.2 ! kristaps 406: dbg_leave(arg, ROFF_Os);
! 407:
1.1 kristaps 408: return(1);
409: }
410:
411: assert(out);
412: assert(buf);
413: assert(sz > 0);
414: assert(pos > 0);
415:
416: if (tree->last) {
417: warnx("%s: superfluous prelude (line %zu, col %zu)",
418: in->name, in->line, pos);
419: return(0);
420: }
421:
422: if ((ROFF_PRELUDE_Os & tree->state) ||
423: ! (ROFF_PRELUDE_Dt & tree->state) ||
424: ! (ROFF_PRELUDE_Dd & tree->state)) {
425: warnx("%s: bad manual prelude (line %zu, col %zu)",
426: in->name, in->line, pos);
427: return(1);
428: }
429:
430: node = malloc(sizeof(struct roffnode));
431: if (NULL == node) {
432: warn("malloc");
433: return(0);
434: }
435: node->tok = ROFF_Os;
436: node->parent = NULL;
437:
438: tree->state |= ROFF_PRELUDE_Os;
439: tree->last = node;
440:
1.2 ! kristaps 441: dbg_enter(arg, ROFF_Os);
! 442:
1.1 kristaps 443: return(1);
444: }
445:
446:
447: static int
448: roff_Sh(ROFFCALL_ARGS)
449: {
1.2 ! kristaps 450: struct roffnode *node;
1.1 kristaps 451:
452: assert(arg);
1.2 ! kristaps 453: assert(tree);
! 454: assert(tree->last);
1.1 kristaps 455: assert(in);
1.2 ! kristaps 456:
! 457: if (ROFF_EXIT == type) {
! 458: assert(tree->last->tok == ROFF_Sh);
! 459:
! 460: node = tree->last;
! 461: tree->last = node->parent;
! 462: free(node);
! 463:
! 464: dbg_leave(arg, ROFF_Sh);
! 465:
! 466: return(1);
! 467: }
! 468:
! 469: assert(out);
! 470: assert(buf);
! 471: assert(sz > 0);
! 472: assert(pos > 0);
! 473:
! 474: node = malloc(sizeof(struct roffnode));
! 475: if (NULL == node) {
! 476: warn("malloc");
! 477: return(0);
! 478: }
! 479: node->tok = ROFF_Sh;
! 480: node->parent = tree->last;
! 481:
! 482: tree->last = node;
! 483:
! 484: dbg_enter(arg, ROFF_Sh);
! 485:
1.1 kristaps 486: return(1);
487: }
488:
1.2 ! kristaps 489:
! 490: static int dbg_lvl = 0; /* FIXME: de-globalise. */
! 491:
! 492:
! 493: static void
! 494: dbg_enter(const struct md_args *args, int tokid)
! 495: {
! 496: int i;
! 497:
! 498: assert(args);
! 499: if ( ! (args->dbg & MD_DBG_TREE))
! 500: return;
! 501:
! 502: assert(tokid >= 0 && tokid <= ROFF_Max);
! 503:
! 504: for (i = 0; i < dbg_lvl; i++)
! 505: (void)printf(" ");
! 506:
! 507: (void)printf("%s\n", tokens[tokid].name);
! 508:
! 509: if (ROFF_LAYOUT == tokens[tokid].type)
! 510: dbg_lvl++;
! 511: }
! 512:
! 513:
! 514: static void
! 515: dbg_leave(const struct md_args *args, int tokid)
! 516: {
! 517: int i;
! 518:
! 519: assert(args);
! 520: if ( ! (args->dbg & MD_DBG_TREE))
! 521: return;
! 522:
! 523: assert(tokid >= 0 && tokid <= ROFF_Max);
! 524: assert(dbg_lvl > 0);
! 525:
! 526: dbg_lvl--;
! 527: for (i = 0; i < dbg_lvl; i++)
! 528: (void)printf(" ");
! 529:
! 530: (void)printf("%s\n", tokens[tokid].name);
! 531: }
! 532:
CVSweb