Annotation of mandoc/html4_strict.c, Revision 1.3
1.3 ! kristaps 1: /* $Id: html4_strict.c,v 1.2 2008/11/23 19:10:03 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the
7: * above copyright notice and this permission notice appear in all
8: * copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17: * PERFORMANCE OF THIS SOFTWARE.
18: */
19: #include <assert.h>
20: #include <ctype.h>
21: #include <err.h>
22: #include <stdlib.h>
23: #include <stdio.h>
24: #include <string.h>
25: #include <time.h>
26:
27: #include "libmdocml.h"
28: #include "private.h"
29:
1.3 ! kristaps 30: enum roffd {
! 31: ROFF_ENTER = 0,
! 32: ROFF_EXIT
1.1 kristaps 33: };
34:
1.3 ! kristaps 35: enum rofftype {
! 36: ROFF_TITLE,
! 37: ROFF_COMMENT,
! 38: ROFF_TEXT,
! 39: ROFF_LAYOUT
1.1 kristaps 40: };
41:
42: #define ROFFCALL_ARGS const struct md_args *arg, \
43: struct md_mbuf *out, \
44: const struct md_rbuf *in, \
45: const char *buf, size_t sz, \
46: size_t pos, enum roffd type, \
47: struct rofftree *tree
48:
1.3 ! kristaps 49: struct rofftree;
1.1 kristaps 50:
51: struct rofftok {
1.3 ! kristaps 52: int id;
1.1 kristaps 53: char name[2];
1.3 ! kristaps 54: int (*cb)(ROFFCALL_ARGS);
1.1 kristaps 55: enum rofftype type;
56: int flags;
1.3 ! kristaps 57: #define ROFF_NESTED (1 << 0) /* FIXME: test. */
! 58: #define ROFF_PARSED (1 << 1) /* FIXME: test. */
! 59: #define ROFF_CALLABLE (1 << 2) /* FIXME: test. */
1.1 kristaps 60: };
61:
62: struct roffnode {
63: int tok;
64: struct roffnode *parent;
1.3 ! kristaps 65: size_t line;
1.1 kristaps 66: };
67:
1.3 ! kristaps 68: struct rofftree {
1.1 kristaps 69: struct roffnode *last;
70: time_t date;
71: char title[256];
72: char section[256];
73: char volume[256];
74: int state;
1.3 ! kristaps 75: #define ROFF_PRELUDE (1 << 1)
! 76: #define ROFF_PRELUDE_Os (1 << 2)
! 77: #define ROFF_PRELUDE_Dt (1 << 3)
! 78: #define ROFF_PRELUDE_Dd (1 << 4)
! 79: #define ROFF_BODY (1 << 5)
1.1 kristaps 80: };
81:
1.3 ! kristaps 82: #define ROFF___ 0
! 83: #define ROFF_Dd 1
! 84: #define ROFF_Dt 2
! 85: #define ROFF_Os 3
! 86: #define ROFF_Sh 4
! 87: #define ROFF_An 5
! 88: #define ROFF_Li 6
! 89: #define ROFF_Max 7
! 90:
! 91: static int roff_Dd(ROFFCALL_ARGS);
! 92: static int roff_Dt(ROFFCALL_ARGS);
! 93: static int roff_Os(ROFFCALL_ARGS);
! 94: static int roff_Sh(ROFFCALL_ARGS);
! 95: static int roff_An(ROFFCALL_ARGS);
! 96: static int roff_Li(ROFFCALL_ARGS);
! 97:
! 98: static struct roffnode *roffnode_new(int, size_t,
! 99: struct rofftree *);
! 100: static void roffnode_free(int, struct rofftree *);
! 101:
1.2 kristaps 102: static int rofffind(const char *);
103: static int roffparse(const struct md_args *,
1.1 kristaps 104: struct md_mbuf *,
105: const struct md_rbuf *,
106: const char *, size_t,
107: struct rofftree *);
1.2 kristaps 108: static int textparse(struct md_mbuf *,
1.1 kristaps 109: const struct md_rbuf *,
110: const char *, size_t,
111: const struct rofftree *);
112:
1.2 kristaps 113: static void dbg_enter(const struct md_args *, int);
114: static void dbg_leave(const struct md_args *, int);
115:
1.1 kristaps 116:
1.3 ! kristaps 117: static const struct rofftok tokens[ROFF_Max] =
! 118: {
! 119: { ROFF___, "\\\"", NULL, ROFF_COMMENT, 0 },
! 120: { ROFF_Dd, "Dd", roff_Dd, ROFF_TITLE, 0 },
! 121: { ROFF_Dt, "Dt", roff_Dt, ROFF_TITLE, 0 },
! 122: { ROFF_Os, "Os", roff_Os, ROFF_TITLE, 0 },
! 123: { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 },
! 124: { ROFF_An, "An", roff_An, ROFF_TEXT, ROFF_PARSED },
! 125: { ROFF_Li, "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
! 126: };
! 127:
! 128:
1.1 kristaps 129: int
130: md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out,
1.3 ! kristaps 131: const struct md_rbuf *in, int error, void *data)
1.1 kristaps 132: {
133: struct rofftree *tree;
134:
135: assert(args);
136: assert(data);
137: tree = (struct rofftree *)data;
138:
1.3 ! kristaps 139: if (-1 == error)
! 140: out = NULL;
! 141:
! 142: /* LINTED */
1.1 kristaps 143: while (tree->last)
1.3 ! kristaps 144: if ( ! (*tokens[tree->last->tok].cb)(args, out, in,
! 145: NULL, 0, 0, ROFF_EXIT, tree))
! 146: out = NULL;
! 147:
! 148: if (out && (ROFF_PRELUDE & tree->state)) {
! 149: warnx("%s: prelude never finished", in->name);
! 150: error = 1;
! 151: }
1.1 kristaps 152:
153: free(tree);
1.3 ! kristaps 154:
1.1 kristaps 155: return(error ? 0 : 1);
156: }
157:
158:
159: int
160: md_init_html4_strict(const struct md_args *args, struct md_mbuf *out,
161: const struct md_rbuf *in, void **data)
162: {
163: struct rofftree *tree;
164:
165: assert(args);
166: assert(in);
167: assert(out);
168: assert(data);
169:
170: /* TODO: write HTML-DTD header. */
171:
172: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
173: warn("malloc");
174: return(0);
175: }
176:
1.3 ! kristaps 177: tree->state = ROFF_PRELUDE;
! 178:
1.1 kristaps 179: *data = tree;
180: return(1);
181: }
182:
183:
184: int
185: md_line_html4_strict(const struct md_args *args, struct md_mbuf *out,
186: const struct md_rbuf *in, const char *buf,
187: size_t sz, void *data)
188: {
189: struct rofftree *tree;
190:
191: assert(args);
192: assert(in);
193: assert(data);
194:
195: tree = (struct rofftree *)data;
196:
197: if (0 == sz) {
198: warnx("%s: blank line (line %zu)", in->name, in->line);
199: return(0);
200: } else if ('.' != *buf)
201: return(textparse(out, in, buf, sz, tree));
202:
203: return(roffparse(args, out, in, buf, sz, tree));
204: }
205:
206:
207: static int
208: textparse(struct md_mbuf *out, const struct md_rbuf *in,
209: const char *buf, size_t sz,
210: const struct rofftree *tree)
211: {
212:
213: assert(tree);
214: assert(out);
215: assert(in);
216: assert(buf);
217: assert(sz > 0);
218:
219: if (NULL == tree->last) {
220: warnx("%s: unexpected text (line %zu)",
221: in->name, in->line);
222: return(0);
223: } else if (NULL == tree->last->parent) {
224: warnx("%s: disallowed text (line %zu)",
225: in->name, in->line);
226: return(0);
227: }
228:
229: if ( ! md_buf_puts(out, buf, sz))
230: return(0);
231: return(md_buf_putstring(out, " "));
232: }
233:
234:
235: static int
236: roffparse(const struct md_args *args, struct md_mbuf *out,
237: const struct md_rbuf *in, const char *buf,
238: size_t sz, struct rofftree *tree)
239: {
240: int tokid, t;
241: size_t pos;
242: struct roffnode *node;
243:
244: assert(args);
245: assert(out);
246: assert(in);
247: assert(buf);
248: assert(sz > 0);
249: assert(tree);
250:
251: /*
252: * Extract the token identifier from the buffer. If there's no
253: * callback for the token (comment, etc.) then exit immediately.
254: * We don't do any error handling (yet), so if the token doesn't
255: * exist, die.
256: */
257:
258: if (3 > sz) {
1.3 ! kristaps 259: warnx("%s: malformed line (line %zu)",
1.1 kristaps 260: in->name, in->line);
261: return(0);
262: } else if (ROFF_Max == (tokid = rofffind(buf + 1))) {
1.3 ! kristaps 263: warnx("%s: unknown line token `%c%c' (line %zu)",
1.1 kristaps 264: in->name, *(buf + 1),
265: *(buf + 2), in->line);
266: return(0);
1.3 ! kristaps 267: }
! 268:
! 269: /* Domain cross-contamination (and sanity) checks. */
! 270:
! 271: switch (tokens[tokid].type) {
! 272: case (ROFF_TITLE):
! 273: if (ROFF_PRELUDE & tree->state) {
! 274: assert( ! (ROFF_BODY & tree->state));
! 275: break;
! 276: }
! 277: assert(ROFF_BODY & tree->state);
! 278: warnx("%s: prelude token `%s' in body (line %zu)",
! 279: in->name, tokens[tokid].name, in->line);
! 280: return(0);
! 281: case (ROFF_LAYOUT):
! 282: /* FALLTHROUGH */
! 283: case (ROFF_TEXT):
! 284: if (ROFF_BODY & tree->state) {
! 285: assert( ! (ROFF_PRELUDE & tree->state));
! 286: break;
! 287: }
! 288: assert(ROFF_PRELUDE & tree->state);
! 289: warnx("%s: text token `%s' in prelude (line %zu)",
! 290: in->name, tokens[tokid].name, in->line);
! 291: return(0);
! 292: default:
! 293: return(1);
! 294: }
! 295:
! 296: /*
! 297: * Text-domain checks.
! 298: */
1.1 kristaps 299:
1.3 ! kristaps 300: if (ROFF_TEXT == tokens[tokid].type &&
! 301: ! (ROFF_PARSED & tokens[tokid].flags)) {
! 302: warnx("%s: text token `%s' not callable (line %zu)",
! 303: in->name, tokens[tokid].name, in->line);
! 304: return(0);
! 305: }
1.1 kristaps 306:
307: /*
308: * If this is a non-nestable layout token and we're below a
309: * token of the same type, then recurse upward to the token,
310: * closing out the interim scopes.
311: *
312: * If there's a nested token on the chain, then raise an error
313: * as nested tokens have corresponding "ending" tokens and we're
314: * breaking their scope.
315: */
316:
317: node = NULL;
1.3 ! kristaps 318: pos = 3;
1.1 kristaps 319:
320: if (ROFF_LAYOUT == tokens[tokid].type &&
321: ! (ROFF_NESTED & tokens[tokid].flags)) {
322: for (node = tree->last; node; node = node->parent) {
323: if (node->tok == tokid)
324: break;
325:
326: /* Don't break nested scope. */
327:
328: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
329: continue;
1.3 ! kristaps 330: warnx("%s: scope of %s (line %zu) broken by "
! 331: "%s (line %zu)", in->name,
! 332: tokens[tokid].name,
! 333: node->line,
1.1 kristaps 334: tokens[node->tok].name,
1.3 ! kristaps 335: in->line);
1.1 kristaps 336: return(0);
337: }
338: }
1.3 ! kristaps 339:
1.1 kristaps 340: if (node) {
341: assert(ROFF_LAYOUT == tokens[tokid].type);
342: assert( ! (ROFF_NESTED & tokens[tokid].flags));
343: assert(node->tok == tokid);
344:
345: /* Clear up to last scoped token. */
346:
1.3 ! kristaps 347: /* LINTED */
1.1 kristaps 348: do {
349: t = tree->last->tok;
350: if ( ! (*tokens[tree->last->tok].cb)
351: (args, out, in, NULL,
352: 0, 0, ROFF_EXIT, tree))
353: return(0);
354: } while (t != tokid);
355: }
356:
357: /* Proceed with actual token processing. */
358:
359: return((*tokens[tokid].cb)(args, out, in, buf, sz,
360: pos, ROFF_ENTER, tree));
361: }
362:
363:
364: static int
365: rofffind(const char *name)
366: {
367: size_t i;
368:
369: assert(name);
370: /* FIXME: use a table, this is slow but ok for now. */
1.3 ! kristaps 371:
! 372: /* LINTED */
1.1 kristaps 373: for (i = 0; i < ROFF_Max; i++)
1.3 ! kristaps 374: /* LINTED */
1.1 kristaps 375: if (0 == strncmp(name, tokens[i].name, 2))
1.3 ! kristaps 376: return((int)i);
1.1 kristaps 377:
378: return(ROFF_Max);
379: }
380:
381:
1.3 ! kristaps 382: static struct roffnode *
! 383: roffnode_new(int tokid, size_t line, struct rofftree *tree)
! 384: {
! 385: struct roffnode *p;
! 386:
! 387: if (NULL == (p = malloc(sizeof(struct roffnode)))) {
! 388: warn("malloc");
! 389: return(NULL);
! 390: }
! 391:
! 392: p->line = line;
! 393: p->tok = tokid;
! 394: p->parent = tree->last;
! 395: tree->last = p;
! 396: return(p);
! 397: }
! 398:
! 399:
! 400: static void
! 401: roffnode_free(int tokid, struct rofftree *tree)
! 402: {
! 403: struct roffnode *p;
! 404:
! 405: assert(tree->last);
! 406: assert(tree->last->tok == tokid);
! 407:
! 408: p = tree->last;
! 409: tree->last = tree->last->parent;
! 410: free(p);
! 411: }
! 412:
! 413:
! 414: static int dbg_lvl = 0; /* FIXME: de-globalise. */
! 415:
! 416:
! 417: static void
! 418: dbg_enter(const struct md_args *args, int tokid)
! 419: {
! 420: int i;
! 421:
! 422: assert(args);
! 423: if ( ! (args->dbg & MD_DBG_TREE))
! 424: return;
! 425:
! 426: assert(tokid >= 0 && tokid <= ROFF_Max);
! 427:
! 428: /* LINTED */
! 429: for (i = 0; i < dbg_lvl; i++)
! 430: (void)printf(" ");
! 431:
! 432: (void)printf("%s\n", tokens[tokid].name);
! 433:
! 434: if (ROFF_LAYOUT == tokens[tokid].type)
! 435: dbg_lvl++;
! 436: }
! 437:
! 438:
! 439: static void
! 440: dbg_leave(const struct md_args *args, int tokid)
! 441: {
! 442: int i;
! 443:
! 444: assert(args);
! 445: if ( ! (args->dbg & MD_DBG_TREE))
! 446: return;
! 447: if (ROFF_LAYOUT != tokens[tokid].type)
! 448: return;
! 449:
! 450: assert(tokid >= 0 && tokid <= ROFF_Max);
! 451: assert(dbg_lvl > 0);
! 452:
! 453: dbg_lvl--;
! 454:
! 455: /* LINTED */
! 456: for (i = 0; i < dbg_lvl; i++)
! 457: (void)printf(" ");
! 458:
! 459: (void)printf("%s\n", tokens[tokid].name);
! 460: }
! 461:
! 462:
1.1 kristaps 463: static int
464: roff_Dd(ROFFCALL_ARGS)
465: {
466:
1.3 ! kristaps 467: assert(ROFF_PRELUDE & tree->state);
! 468: if (ROFF_PRELUDE_Dt & tree->state ||
! 469: ROFF_PRELUDE_Dd & tree->state) {
! 470: warnx("%s: bad prelude ordering (line %zu)",
! 471: in->name, in->line);
1.1 kristaps 472: return(0);
473: }
474:
1.3 ! kristaps 475: assert(NULL == tree->last);
1.1 kristaps 476: tree->state |= ROFF_PRELUDE_Dd;
1.2 kristaps 477:
1.3 ! kristaps 478: dbg_enter(arg, ROFF_Dd);
1.1 kristaps 479: return(1);
480: }
481:
482:
483: static int
484: roff_Dt(ROFFCALL_ARGS)
485: {
486:
1.3 ! kristaps 487: assert(ROFF_PRELUDE & tree->state);
1.1 kristaps 488: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
489: (ROFF_PRELUDE_Dt & tree->state)) {
1.3 ! kristaps 490: warnx("%s: bad prelude ordering (line %zu)",
! 491: in->name, in->line);
! 492: return(0);
1.1 kristaps 493: }
494:
1.3 ! kristaps 495: assert(NULL == tree->last);
1.1 kristaps 496: tree->state |= ROFF_PRELUDE_Dt;
1.2 kristaps 497:
1.3 ! kristaps 498: dbg_enter(arg, ROFF_Dt);
1.1 kristaps 499: return(1);
500: }
501:
502:
503: static int
504: roff_Os(ROFFCALL_ARGS)
505: {
506:
507: if (ROFF_EXIT == type) {
1.3 ! kristaps 508: roffnode_free(ROFF_Os, tree);
1.2 kristaps 509: dbg_leave(arg, ROFF_Os);
1.1 kristaps 510: return(1);
511: }
512:
1.3 ! kristaps 513: assert(ROFF_PRELUDE & tree->state);
! 514: if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
! 515: ! (ROFF_PRELUDE_Dd & tree->state)) {
! 516: warnx("%s: bad prelude ordering (line %zu)",
! 517: in->name, in->line);
1.1 kristaps 518: return(0);
519: }
520:
1.3 ! kristaps 521: assert(NULL == tree->last);
! 522: if (NULL == roffnode_new(ROFF_Os, in->line, tree))
1.1 kristaps 523: return(0);
524:
525: tree->state |= ROFF_PRELUDE_Os;
1.3 ! kristaps 526: tree->state &= ~ROFF_PRELUDE;
! 527: tree->state |= ROFF_BODY;
1.1 kristaps 528:
1.2 kristaps 529: dbg_enter(arg, ROFF_Os);
1.1 kristaps 530: return(1);
531: }
532:
533:
1.3 ! kristaps 534: static int
1.1 kristaps 535: roff_Sh(ROFFCALL_ARGS)
536: {
1.2 kristaps 537:
538: if (ROFF_EXIT == type) {
1.3 ! kristaps 539: roffnode_free(ROFF_Sh, tree);
1.2 kristaps 540: dbg_leave(arg, ROFF_Sh);
541: return(1);
542: }
543:
1.3 ! kristaps 544: if (NULL == roffnode_new(ROFF_Sh, in->line, tree))
1.2 kristaps 545: return(0);
546:
547: dbg_enter(arg, ROFF_Sh);
1.1 kristaps 548: return(1);
549: }
550:
1.2 kristaps 551:
1.3 ! kristaps 552: static int
! 553: roff_Li(ROFFCALL_ARGS)
1.2 kristaps 554: {
555:
1.3 ! kristaps 556: return(1);
1.2 kristaps 557: }
558:
559:
1.3 ! kristaps 560: static int
! 561: roff_An(ROFFCALL_ARGS)
1.2 kristaps 562: {
563:
1.3 ! kristaps 564: return(1);
1.2 kristaps 565: }
CVSweb