Annotation of mandoc/html4_strict.c, Revision 1.6
1.6 ! kristaps 1: /* $Id: html4_strict.c,v 1.5 2008/11/23 23:35:25 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the
7: * above copyright notice and this permission notice appear in all
8: * copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17: * PERFORMANCE OF THIS SOFTWARE.
18: */
19: #include <assert.h>
20: #include <ctype.h>
21: #include <err.h>
22: #include <stdlib.h>
23: #include <stdio.h>
24: #include <string.h>
25: #include <time.h>
26:
27: #include "libmdocml.h"
28: #include "private.h"
29:
1.6 ! kristaps 30: #define ROFF_MAXARG 10
! 31:
1.3 kristaps 32: enum roffd {
33: ROFF_ENTER = 0,
34: ROFF_EXIT
1.1 kristaps 35: };
36:
1.3 kristaps 37: enum rofftype {
38: ROFF_TITLE,
39: ROFF_COMMENT,
40: ROFF_TEXT,
41: ROFF_LAYOUT
1.1 kristaps 42: };
43:
1.4 kristaps 44: #define ROFFCALL_ARGS \
45: const struct md_args *arg, struct md_mbuf *out, \
1.6 ! kristaps 46: const struct md_rbuf *in, const char *argv[], \
! 47: enum roffd type, struct rofftree *tree
1.1 kristaps 48:
1.3 kristaps 49: struct rofftree;
1.1 kristaps 50:
51: struct rofftok {
1.3 kristaps 52: int id;
1.1 kristaps 53: char name[2];
1.3 kristaps 54: int (*cb)(ROFFCALL_ARGS);
1.1 kristaps 55: enum rofftype type;
56: int flags;
1.4 kristaps 57: #define ROFF_NESTED (1 << 0)
58: #define ROFF_PARSED (1 << 1)
59: #define ROFF_CALLABLE (1 << 2)
1.6 ! kristaps 60: #define ROFF_QUOTES (1 << 3)
1.1 kristaps 61: };
62:
63: struct roffnode {
64: int tok;
65: struct roffnode *parent;
1.3 kristaps 66: size_t line;
1.1 kristaps 67: };
68:
1.3 kristaps 69: struct rofftree {
1.1 kristaps 70: struct roffnode *last;
71: time_t date;
72: char title[256];
73: char section[256];
74: char volume[256];
75: int state;
1.3 kristaps 76: #define ROFF_PRELUDE (1 << 1)
77: #define ROFF_PRELUDE_Os (1 << 2)
78: #define ROFF_PRELUDE_Dt (1 << 3)
79: #define ROFF_PRELUDE_Dd (1 << 4)
80: #define ROFF_BODY (1 << 5)
1.1 kristaps 81: };
82:
1.3 kristaps 83: #define ROFF___ 0
84: #define ROFF_Dd 1
85: #define ROFF_Dt 2
86: #define ROFF_Os 3
87: #define ROFF_Sh 4
88: #define ROFF_An 5
89: #define ROFF_Li 6
1.6 ! kristaps 90: #define ROFF_MAX 7
1.3 kristaps 91:
92: static int roff_Dd(ROFFCALL_ARGS);
93: static int roff_Dt(ROFFCALL_ARGS);
94: static int roff_Os(ROFFCALL_ARGS);
95: static int roff_Sh(ROFFCALL_ARGS);
96: static int roff_An(ROFFCALL_ARGS);
97: static int roff_Li(ROFFCALL_ARGS);
98:
99: static struct roffnode *roffnode_new(int, size_t,
100: struct rofftree *);
101: static void roffnode_free(int, struct rofftree *);
102:
1.2 kristaps 103: static int rofffind(const char *);
1.6 ! kristaps 104: static int roffargs(int, char *, char **);
1.2 kristaps 105: static int roffparse(const struct md_args *,
1.1 kristaps 106: struct md_mbuf *,
107: const struct md_rbuf *,
1.6 ! kristaps 108: char *, size_t, struct rofftree *);
1.2 kristaps 109: static int textparse(struct md_mbuf *,
1.1 kristaps 110: const struct md_rbuf *,
111: const char *, size_t,
112: const struct rofftree *);
113:
1.2 kristaps 114: static void dbg_enter(const struct md_args *, int);
115: static void dbg_leave(const struct md_args *, int);
116:
1.1 kristaps 117:
1.6 ! kristaps 118: static const struct rofftok tokens[ROFF_MAX] =
1.3 kristaps 119: {
120: { ROFF___, "\\\"", NULL, ROFF_COMMENT, 0 },
121: { ROFF_Dd, "Dd", roff_Dd, ROFF_TITLE, 0 },
122: { ROFF_Dt, "Dt", roff_Dt, ROFF_TITLE, 0 },
123: { ROFF_Os, "Os", roff_Os, ROFF_TITLE, 0 },
124: { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 },
125: { ROFF_An, "An", roff_An, ROFF_TEXT, ROFF_PARSED },
126: { ROFF_Li, "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
127: };
128:
129:
1.1 kristaps 130: int
131: md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out,
1.3 kristaps 132: const struct md_rbuf *in, int error, void *data)
1.1 kristaps 133: {
134: struct rofftree *tree;
135:
136: assert(args);
137: assert(data);
138: tree = (struct rofftree *)data;
139:
1.3 kristaps 140: if (-1 == error)
141: out = NULL;
142:
143: /* LINTED */
1.1 kristaps 144: while (tree->last)
1.3 kristaps 145: if ( ! (*tokens[tree->last->tok].cb)(args, out, in,
1.6 ! kristaps 146: NULL, ROFF_EXIT, tree))
1.3 kristaps 147: out = NULL;
148:
149: if (out && (ROFF_PRELUDE & tree->state)) {
150: warnx("%s: prelude never finished", in->name);
151: error = 1;
152: }
1.1 kristaps 153:
154: free(tree);
1.3 kristaps 155:
1.1 kristaps 156: return(error ? 0 : 1);
157: }
158:
159:
160: int
161: md_init_html4_strict(const struct md_args *args, struct md_mbuf *out,
162: const struct md_rbuf *in, void **data)
163: {
164: struct rofftree *tree;
165:
166: assert(args);
167: assert(in);
168: assert(out);
169: assert(data);
170:
171: /* TODO: write HTML-DTD header. */
172:
173: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
174: warn("malloc");
175: return(0);
176: }
177:
1.3 kristaps 178: tree->state = ROFF_PRELUDE;
179:
1.1 kristaps 180: *data = tree;
181: return(1);
182: }
183:
184:
185: int
186: md_line_html4_strict(const struct md_args *args, struct md_mbuf *out,
1.6 ! kristaps 187: const struct md_rbuf *in, char *buf,
1.1 kristaps 188: size_t sz, void *data)
189: {
190: struct rofftree *tree;
191:
192: assert(args);
193: assert(in);
194: assert(data);
195:
196: tree = (struct rofftree *)data;
197:
198: if (0 == sz) {
199: warnx("%s: blank line (line %zu)", in->name, in->line);
200: return(0);
201: } else if ('.' != *buf)
202: return(textparse(out, in, buf, sz, tree));
203:
204: return(roffparse(args, out, in, buf, sz, tree));
205: }
206:
207:
208: static int
209: textparse(struct md_mbuf *out, const struct md_rbuf *in,
210: const char *buf, size_t sz,
211: const struct rofftree *tree)
212: {
213:
214: assert(tree);
215: assert(out);
216: assert(in);
217: assert(buf);
218: assert(sz > 0);
219:
220: if (NULL == tree->last) {
221: warnx("%s: unexpected text (line %zu)",
222: in->name, in->line);
223: return(0);
224: } else if (NULL == tree->last->parent) {
225: warnx("%s: disallowed text (line %zu)",
226: in->name, in->line);
227: return(0);
228: }
229:
230: if ( ! md_buf_puts(out, buf, sz))
231: return(0);
232: return(md_buf_putstring(out, " "));
233: }
234:
235:
236: static int
1.6 ! kristaps 237: roffargs(int tok, char *buf, char **argv)
! 238: {
! 239: int i;
! 240:
! 241: (void)tok;/* FIXME: quotable strings? */
! 242:
! 243: assert(tok >= 0 && tok < ROFF_MAX);
! 244: assert('.' == *buf);
! 245:
! 246: /* LINTED */
! 247: for (i = 0; *buf && i < ROFF_MAXARG; i++) {
! 248: argv[i] = buf++;
! 249: while (*buf && ! isspace(*buf))
! 250: buf++;
! 251: if (NULL == *buf) {
! 252: continue;
! 253: }
! 254: *buf++ = 0;
! 255: while (*buf && isspace(*buf))
! 256: buf++;
! 257: }
! 258:
! 259: assert(i > 0);
! 260: if (i < ROFF_MAXARG)
! 261: argv[i] = NULL;
! 262:
! 263: return(ROFF_MAXARG > i);
! 264: }
! 265:
! 266:
! 267: static int
1.1 kristaps 268: roffparse(const struct md_args *args, struct md_mbuf *out,
1.6 ! kristaps 269: const struct md_rbuf *in, char *buf, size_t sz,
! 270: struct rofftree *tree)
1.1 kristaps 271: {
1.6 ! kristaps 272: int tok, t;
1.1 kristaps 273: struct roffnode *node;
1.6 ! kristaps 274: char *argv[ROFF_MAXARG];
1.1 kristaps 275:
276: assert(sz > 0);
277:
278: /*
279: * Extract the token identifier from the buffer. If there's no
280: * callback for the token (comment, etc.) then exit immediately.
281: * We don't do any error handling (yet), so if the token doesn't
282: * exist, die.
283: */
284:
285: if (3 > sz) {
1.3 kristaps 286: warnx("%s: malformed line (line %zu)",
1.1 kristaps 287: in->name, in->line);
288: return(0);
1.6 ! kristaps 289: } else if (ROFF_MAX == (tok = rofffind(buf + 1))) {
1.3 kristaps 290: warnx("%s: unknown line token `%c%c' (line %zu)",
1.1 kristaps 291: in->name, *(buf + 1),
292: *(buf + 2), in->line);
293: return(0);
1.6 ! kristaps 294: } else if (ROFF_COMMENT == tokens[tok].type)
! 295: /* Ignore comment tokens. */
! 296: return(1);
! 297:
! 298: if ( ! roffargs(tok, buf, argv)) {
! 299: warnx("%s: too many arguments to `%s' (line %zu)",
! 300: in->name, tokens[tok].name, in->line);
! 301: return(0);
! 302: }
1.3 kristaps 303:
304: /* Domain cross-contamination (and sanity) checks. */
305:
1.6 ! kristaps 306: switch (tokens[tok].type) {
1.3 kristaps 307: case (ROFF_TITLE):
308: if (ROFF_PRELUDE & tree->state) {
309: assert( ! (ROFF_BODY & tree->state));
310: break;
311: }
312: assert(ROFF_BODY & tree->state);
313: warnx("%s: prelude token `%s' in body (line %zu)",
1.6 ! kristaps 314: in->name, tokens[tok].name, in->line);
1.3 kristaps 315: return(0);
316: case (ROFF_LAYOUT):
317: /* FALLTHROUGH */
318: case (ROFF_TEXT):
319: if (ROFF_BODY & tree->state) {
320: assert( ! (ROFF_PRELUDE & tree->state));
321: break;
322: }
323: assert(ROFF_PRELUDE & tree->state);
1.4 kristaps 324: warnx("%s: body token `%s' in prelude (line %zu)",
1.6 ! kristaps 325: in->name, tokens[tok].name, in->line);
1.3 kristaps 326: return(0);
1.4 kristaps 327: case (ROFF_COMMENT):
328: return(1);
1.3 kristaps 329: default:
1.4 kristaps 330: abort();
1.3 kristaps 331: }
332:
333: /*
334: * Text-domain checks.
335: */
1.1 kristaps 336:
1.6 ! kristaps 337: if (ROFF_TEXT == tokens[tok].type &&
! 338: ! (ROFF_PARSED & tokens[tok].flags)) {
1.3 kristaps 339: warnx("%s: text token `%s' not callable (line %zu)",
1.6 ! kristaps 340: in->name, tokens[tok].name, in->line);
1.3 kristaps 341: return(0);
342: }
1.1 kristaps 343:
344: /*
345: * If this is a non-nestable layout token and we're below a
346: * token of the same type, then recurse upward to the token,
347: * closing out the interim scopes.
348: *
349: * If there's a nested token on the chain, then raise an error
350: * as nested tokens have corresponding "ending" tokens and we're
351: * breaking their scope.
352: */
353:
354: node = NULL;
355:
1.6 ! kristaps 356: if (ROFF_LAYOUT == tokens[tok].type &&
! 357: ! (ROFF_NESTED & tokens[tok].flags)) {
1.1 kristaps 358: for (node = tree->last; node; node = node->parent) {
1.6 ! kristaps 359: if (node->tok == tok)
1.1 kristaps 360: break;
361:
362: /* Don't break nested scope. */
363:
364: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
365: continue;
1.3 kristaps 366: warnx("%s: scope of %s (line %zu) broken by "
367: "%s (line %zu)", in->name,
1.6 ! kristaps 368: tokens[tok].name,
1.3 kristaps 369: node->line,
1.1 kristaps 370: tokens[node->tok].name,
1.3 kristaps 371: in->line);
1.1 kristaps 372: return(0);
373: }
374: }
1.3 kristaps 375:
1.1 kristaps 376: if (node) {
1.6 ! kristaps 377: assert(ROFF_LAYOUT == tokens[tok].type);
! 378: assert( ! (ROFF_NESTED & tokens[tok].flags));
! 379: assert(node->tok == tok);
1.1 kristaps 380:
381: /* Clear up to last scoped token. */
382:
1.3 kristaps 383: /* LINTED */
1.1 kristaps 384: do {
385: t = tree->last->tok;
386: if ( ! (*tokens[tree->last->tok].cb)
387: (args, out, in, NULL,
1.6 ! kristaps 388: ROFF_EXIT, tree))
1.1 kristaps 389: return(0);
1.6 ! kristaps 390: } while (t != tok);
1.1 kristaps 391: }
392:
393: /* Proceed with actual token processing. */
394:
1.6 ! kristaps 395: return((*tokens[tok].cb)(args, out, in, (const char **)argv,
! 396: ROFF_ENTER, tree));
1.1 kristaps 397: }
398:
399:
400: static int
401: rofffind(const char *name)
402: {
403: size_t i;
404:
405: assert(name);
406: /* FIXME: use a table, this is slow but ok for now. */
1.3 kristaps 407:
408: /* LINTED */
1.6 ! kristaps 409: for (i = 0; i < ROFF_MAX; i++)
1.3 kristaps 410: /* LINTED */
1.1 kristaps 411: if (0 == strncmp(name, tokens[i].name, 2))
1.3 kristaps 412: return((int)i);
1.1 kristaps 413:
1.6 ! kristaps 414: return(ROFF_MAX);
1.1 kristaps 415: }
416:
417:
1.3 kristaps 418: static struct roffnode *
419: roffnode_new(int tokid, size_t line, struct rofftree *tree)
420: {
421: struct roffnode *p;
422:
423: if (NULL == (p = malloc(sizeof(struct roffnode)))) {
424: warn("malloc");
425: return(NULL);
426: }
427:
428: p->line = line;
429: p->tok = tokid;
430: p->parent = tree->last;
431: tree->last = p;
432: return(p);
433: }
434:
435:
436: static void
437: roffnode_free(int tokid, struct rofftree *tree)
438: {
439: struct roffnode *p;
440:
441: assert(tree->last);
442: assert(tree->last->tok == tokid);
443:
444: p = tree->last;
445: tree->last = tree->last->parent;
446: free(p);
447: }
448:
449:
1.4 kristaps 450: static int dbg_lvl = 0;
1.3 kristaps 451:
452:
453: static void
454: dbg_enter(const struct md_args *args, int tokid)
455: {
456: int i;
1.4 kristaps 457: static char buf[72];
1.3 kristaps 458:
459: assert(args);
460: if ( ! (args->dbg & MD_DBG_TREE))
461: return;
1.6 ! kristaps 462: assert(tokid >= 0 && tokid <= ROFF_MAX);
1.3 kristaps 463:
1.4 kristaps 464: buf[0] = 0;
465:
466: switch (tokens[tokid].type) {
467: case (ROFF_LAYOUT):
468: /* FALLTHROUGH */
469: case (ROFF_TEXT):
470: (void)strlcat(buf, "body: ", sizeof(buf));
471: break;
472: case (ROFF_TITLE):
473: (void)strlcat(buf, "prelude: ", sizeof(buf));
474: break;
475: default:
476: abort();
477: }
1.3 kristaps 478:
479: /* LINTED */
480: for (i = 0; i < dbg_lvl; i++)
1.4 kristaps 481: (void)strlcat(buf, " ", sizeof(buf));
482:
483: (void)strlcat(buf, tokens[tokid].name, sizeof(buf));
1.3 kristaps 484:
1.4 kristaps 485: (void)printf("%s\n", buf);
1.3 kristaps 486:
487: if (ROFF_LAYOUT == tokens[tokid].type)
488: dbg_lvl++;
489: }
490:
491:
492: static void
493: dbg_leave(const struct md_args *args, int tokid)
494: {
495: assert(args);
496: if ( ! (args->dbg & MD_DBG_TREE))
497: return;
498: if (ROFF_LAYOUT != tokens[tokid].type)
499: return;
500:
1.6 ! kristaps 501: assert(tokid >= 0 && tokid <= ROFF_MAX);
1.3 kristaps 502: assert(dbg_lvl > 0);
503: dbg_lvl--;
504: }
505:
506:
1.6 ! kristaps 507: /* ARGSUSED */
1.1 kristaps 508: static int
509: roff_Dd(ROFFCALL_ARGS)
510: {
511:
1.5 kristaps 512: dbg_enter(arg, ROFF_Dd);
513:
1.3 kristaps 514: assert(ROFF_PRELUDE & tree->state);
515: if (ROFF_PRELUDE_Dt & tree->state ||
516: ROFF_PRELUDE_Dd & tree->state) {
1.4 kristaps 517: warnx("%s: prelude `Dd' out-of-order (line %zu)",
1.3 kristaps 518: in->name, in->line);
1.1 kristaps 519: return(0);
520: }
521:
1.3 kristaps 522: assert(NULL == tree->last);
1.1 kristaps 523: tree->state |= ROFF_PRELUDE_Dd;
1.2 kristaps 524:
1.5 kristaps 525: dbg_leave(arg, ROFF_Dd);
526:
1.1 kristaps 527: return(1);
528: }
529:
530:
1.6 ! kristaps 531: /* ARGSUSED */
1.1 kristaps 532: static int
533: roff_Dt(ROFFCALL_ARGS)
534: {
535:
1.5 kristaps 536: dbg_enter(arg, ROFF_Dt);
537:
1.3 kristaps 538: assert(ROFF_PRELUDE & tree->state);
1.1 kristaps 539: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
540: (ROFF_PRELUDE_Dt & tree->state)) {
1.4 kristaps 541: warnx("%s: prelude `Dt' out-of-order (line %zu)",
1.3 kristaps 542: in->name, in->line);
543: return(0);
1.1 kristaps 544: }
545:
1.3 kristaps 546: assert(NULL == tree->last);
1.1 kristaps 547: tree->state |= ROFF_PRELUDE_Dt;
1.2 kristaps 548:
1.5 kristaps 549: dbg_leave(arg, ROFF_Dt);
550:
1.1 kristaps 551: return(1);
552: }
553:
554:
1.6 ! kristaps 555: /* ARGSUSED */
1.1 kristaps 556: static int
557: roff_Os(ROFFCALL_ARGS)
558: {
559:
560: if (ROFF_EXIT == type) {
1.3 kristaps 561: roffnode_free(ROFF_Os, tree);
1.2 kristaps 562: dbg_leave(arg, ROFF_Os);
1.1 kristaps 563: return(1);
564: }
565:
1.5 kristaps 566: dbg_enter(arg, ROFF_Os);
567:
1.3 kristaps 568: assert(ROFF_PRELUDE & tree->state);
569: if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
570: ! (ROFF_PRELUDE_Dd & tree->state)) {
1.4 kristaps 571: warnx("%s: prelude `Os' out-of-order (line %zu)",
1.3 kristaps 572: in->name, in->line);
1.1 kristaps 573: return(0);
574: }
575:
1.3 kristaps 576: assert(NULL == tree->last);
577: if (NULL == roffnode_new(ROFF_Os, in->line, tree))
1.1 kristaps 578: return(0);
579:
580: tree->state |= ROFF_PRELUDE_Os;
1.3 kristaps 581: tree->state &= ~ROFF_PRELUDE;
582: tree->state |= ROFF_BODY;
1.1 kristaps 583:
584: return(1);
585: }
586:
587:
1.6 ! kristaps 588: /* ARGSUSED */
1.3 kristaps 589: static int
1.1 kristaps 590: roff_Sh(ROFFCALL_ARGS)
591: {
1.2 kristaps 592:
593: if (ROFF_EXIT == type) {
1.3 kristaps 594: roffnode_free(ROFF_Sh, tree);
1.2 kristaps 595: dbg_leave(arg, ROFF_Sh);
596: return(1);
597: }
598:
1.5 kristaps 599: dbg_enter(arg, ROFF_Sh);
600:
1.3 kristaps 601: if (NULL == roffnode_new(ROFF_Sh, in->line, tree))
1.2 kristaps 602: return(0);
603:
1.5 kristaps 604: dbg_leave(arg, ROFF_Li);
605:
1.1 kristaps 606: return(1);
607: }
608:
1.2 kristaps 609:
1.6 ! kristaps 610: /* ARGSUSED */
1.3 kristaps 611: static int
612: roff_Li(ROFFCALL_ARGS)
1.2 kristaps 613: {
614:
1.5 kristaps 615: dbg_enter(arg, ROFF_Li);
616: dbg_leave(arg, ROFF_Li);
617:
1.3 kristaps 618: return(1);
1.2 kristaps 619: }
620:
621:
1.5 kristaps 622: #if 0
623: static int
624: parse_args(void)
625: {
626: skip_whitespace();
627:
628: while (pos < sz) {
629:
630: if (is_arg) {
631: } else if (parsable) {
632: if (is_callable_token()) {
633: }
634: }
635:
636: skip_whitespace();
637: }
638: }
639: #endif
640:
641:
1.6 ! kristaps 642: /* ARGSUSED */
1.3 kristaps 643: static int
644: roff_An(ROFFCALL_ARGS)
1.2 kristaps 645: {
646:
1.5 kristaps 647: dbg_enter(arg, ROFF_An);
648:
649: /* Do our ML stuff. */
650:
651: /*parse_args();*/
652:
653: /* Do our trailing whitespace stuff. */
654:
655: dbg_leave(arg, ROFF_An);
656:
1.3 kristaps 657: return(1);
1.2 kristaps 658: }
CVSweb