Annotation of mandoc/roff.c, Revision 1.3
1.3 ! kristaps 1: /* $Id: roff.c,v 1.2 2008/11/24 18:32:39 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the
7: * above copyright notice and this permission notice appear in all
8: * copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17: * PERFORMANCE OF THIS SOFTWARE.
18: */
19: #include <assert.h>
20: #include <ctype.h>
21: #include <err.h>
22: #include <stdlib.h>
23: #include <stdio.h>
24: #include <string.h>
25: #include <time.h>
26:
27: #include "libmdocml.h"
28: #include "private.h"
29:
30: #define ROFF_MAXARG 10
31:
32: enum roffd {
33: ROFF_ENTER = 0,
34: ROFF_EXIT
35: };
36:
1.3 ! kristaps 37: /* FIXME: prolog roffs can be text roffs, too. */
! 38:
1.1 kristaps 39: enum rofftype {
40: ROFF_TITLE,
41: ROFF_COMMENT,
42: ROFF_TEXT,
43: ROFF_LAYOUT
44: };
45:
46: #define ROFFCALL_ARGS \
1.2 kristaps 47: int tok, struct rofftree *tree, \
48: const char *argv[], enum roffd type
1.1 kristaps 49:
50: struct rofftree;
51:
52: struct rofftok {
53: char *name;
54: int (*cb)(ROFFCALL_ARGS);
55: enum rofftype type;
56: int flags;
57: #define ROFF_NESTED (1 << 0)
58: #define ROFF_PARSED (1 << 1)
59: #define ROFF_CALLABLE (1 << 2)
60: #define ROFF_QUOTES (1 << 3)
61: };
62:
63: struct roffarg {
64: char *name;
65: int flags;
66: #define ROFF_VALUE (1 << 0)
67: };
68:
69: struct roffnode {
70: int tok;
71: struct roffnode *parent;
72: size_t line;
73: };
74:
75: struct rofftree {
76: struct roffnode *last;
77: time_t date;
78: char title[256];
79: char section[256];
80: char volume[256];
81: int state;
82: #define ROFF_PRELUDE (1 << 1)
83: #define ROFF_PRELUDE_Os (1 << 2)
84: #define ROFF_PRELUDE_Dt (1 << 3)
85: #define ROFF_PRELUDE_Dd (1 << 4)
86: #define ROFF_BODY (1 << 5)
87: struct md_mbuf *mbuf; /* NULL if ROFF_EXIT and error. */
88:
89: const struct md_args *args;
90: const struct md_rbuf *rbuf;
1.2 kristaps 91: const roffin *roffin;
92: const roffblkin *roffblkin;
93: const roffout *roffout;
94: const roffblkout *roffblkout;
1.1 kristaps 95: };
96:
97: static int roff_Dd(ROFFCALL_ARGS);
98: static int roff_Dt(ROFFCALL_ARGS);
99: static int roff_Os(ROFFCALL_ARGS);
1.2 kristaps 100:
101: static int roff_layout(ROFFCALL_ARGS);
102: static int roff_text(ROFFCALL_ARGS);
1.1 kristaps 103:
104: static struct roffnode *roffnode_new(int, size_t,
105: struct rofftree *);
106: static void roffnode_free(int, struct rofftree *);
107:
108: static int rofffindtok(const char *);
109: static int rofffindarg(const char *);
1.2 kristaps 110: static int rofffindcallable(const char *);
1.1 kristaps 111: static int roffargs(int, char *, char **);
112: static int roffparse(struct rofftree *, char *, size_t);
113: static int textparse(const struct rofftree *,
114: const char *, size_t);
115:
116:
117: static const struct rofftok tokens[ROFF_MAX] = {
118: { "\\\"", NULL, ROFF_COMMENT, 0 },
119: { "Dd", roff_Dd, ROFF_TITLE, 0 },
120: { "Dt", roff_Dt, ROFF_TITLE, 0 },
121: { "Os", roff_Os, ROFF_TITLE, 0 },
1.2 kristaps 122: { "Sh", roff_layout, ROFF_LAYOUT, ROFF_PARSED },
123: { "An", roff_text, ROFF_TEXT, ROFF_PARSED },
124: { "Li", roff_text, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
1.1 kristaps 125: };
126:
127: static const struct roffarg tokenargs[ROFF_ARGMAX] = {
128: { "split", 0 },
129: { "nosplit", 0 },
130: };
131:
132:
133: int
134: roff_free(struct rofftree *tree, int flush)
135: {
136: int error;
137:
138: assert(tree->mbuf);
139: if ( ! flush)
140: tree->mbuf = NULL;
141:
142: /* LINTED */
143: while (tree->last)
144: if ( ! (*tokens[tree->last->tok].cb)
1.2 kristaps 145: (tree->last->tok, tree, NULL, ROFF_EXIT))
1.1 kristaps 146: /* Disallow flushing. */
147: tree->mbuf = NULL;
148:
149: error = tree->mbuf ? 0 : 1;
150:
151: if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
152: warnx("%s: prelude never finished",
153: tree->rbuf->name);
154: error = 1;
155: }
156:
157: free(tree);
158: return(error ? 0 : 1);
159: }
160:
161:
162: struct rofftree *
163: roff_alloc(const struct md_args *args, struct md_mbuf *out,
1.2 kristaps 164: const struct md_rbuf *in,
165: const roffin *roffin, const roffout *roffout,
166: const roffblkin *roffblkin,
167: const roffblkout *roffblkout)
1.1 kristaps 168: {
169: struct rofftree *tree;
170:
171: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
172: warn("malloc");
173: return(NULL);
174: }
175:
176: tree->state = ROFF_PRELUDE;
177: tree->args = args;
178: tree->mbuf = out;
179: tree->rbuf = in;
1.2 kristaps 180: tree->roffin = roffin;
181: tree->roffout = roffout;
182: tree->roffblkin = roffblkin;
183: tree->roffblkout = roffblkout;
1.1 kristaps 184:
185: return(tree);
186: }
187:
188:
189: int
190: roff_engine(struct rofftree *tree, char *buf, size_t sz)
191: {
192:
193: if (0 == sz) {
194: warnx("%s: blank line (line %zu)",
195: tree->rbuf->name,
196: tree->rbuf->line);
197: return(0);
198: } else if ('.' != *buf)
199: return(textparse(tree, buf, sz));
200:
201: return(roffparse(tree, buf, sz));
202: }
203:
204:
205: static int
206: textparse(const struct rofftree *tree, const char *buf, size_t sz)
207: {
208:
209: if (NULL == tree->last) {
210: warnx("%s: unexpected text (line %zu)",
211: tree->rbuf->name,
212: tree->rbuf->line);
213: return(0);
214: } else if (NULL == tree->last->parent) {
215: warnx("%s: disallowed text (line %zu)",
216: tree->rbuf->name,
217: tree->rbuf->line);
218: return(0);
219: }
220:
221: /* Print text. */
222:
223: return(1);
224: }
225:
226:
227: static int
228: roffargs(int tok, char *buf, char **argv)
229: {
230: int i;
231:
232: (void)tok;/* FIXME: quotable strings? */
233:
234: assert(tok >= 0 && tok < ROFF_MAX);
235: assert('.' == *buf);
236:
237: /* LINTED */
238: for (i = 0; *buf && i < ROFF_MAXARG; i++) {
239: argv[i] = buf++;
240: while (*buf && ! isspace(*buf))
241: buf++;
242: if (0 == *buf) {
243: continue;
244: }
245: *buf++ = 0;
246: while (*buf && isspace(*buf))
247: buf++;
248: }
249:
250: assert(i > 0);
251: if (i < ROFF_MAXARG)
252: argv[i] = NULL;
253:
254: return(ROFF_MAXARG > i);
255: }
256:
257:
258: static int
259: roffparse(struct rofftree *tree, char *buf, size_t sz)
260: {
261: int tok, t;
262: struct roffnode *node;
263: char *argv[ROFF_MAXARG];
264: const char **argvp;
265:
266: assert(sz > 0);
267:
268: /*
269: * Extract the token identifier from the buffer. If there's no
270: * callback for the token (comment, etc.) then exit immediately.
271: * We don't do any error handling (yet), so if the token doesn't
272: * exist, die.
273: */
274:
275: if (3 > sz) {
276: warnx("%s: malformed line (line %zu)",
277: tree->rbuf->name,
278: tree->rbuf->line);
279: return(0);
280: } else if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
281: warnx("%s: unknown line token `%c%c' (line %zu)",
282: tree->rbuf->name,
283: *(buf + 1), *(buf + 2),
284: tree->rbuf->line);
285: return(0);
286: } else if (ROFF_COMMENT == tokens[tok].type)
287: /* Ignore comment tokens. */
288: return(1);
289:
290: if ( ! roffargs(tok, buf, argv)) {
291: warnx("%s: too many arguments to `%s' (line %zu)",
292: tree->rbuf->name, tokens[tok].name,
293: tree->rbuf->line);
294: return(0);
295: }
296:
297: /* Domain cross-contamination (and sanity) checks. */
298:
299: switch (tokens[tok].type) {
300: case (ROFF_TITLE):
301: if (ROFF_PRELUDE & tree->state) {
302: assert( ! (ROFF_BODY & tree->state));
303: break;
304: }
305: assert(ROFF_BODY & tree->state);
306: warnx("%s: prelude token `%s' in body (line %zu)",
307: tree->rbuf->name, tokens[tok].name,
308: tree->rbuf->line);
309: return(0);
310: case (ROFF_LAYOUT):
311: /* FALLTHROUGH */
312: case (ROFF_TEXT):
313: if (ROFF_BODY & tree->state) {
314: assert( ! (ROFF_PRELUDE & tree->state));
315: break;
316: }
317: assert(ROFF_PRELUDE & tree->state);
318: warnx("%s: body token `%s' in prelude (line %zu)",
319: tree->rbuf->name, tokens[tok].name,
320: tree->rbuf->line);
321: return(0);
322: case (ROFF_COMMENT):
323: return(1);
324: default:
325: abort();
326: }
327:
328: /*
329: * If this is a non-nestable layout token and we're below a
330: * token of the same type, then recurse upward to the token,
331: * closing out the interim scopes.
332: *
333: * If there's a nested token on the chain, then raise an error
334: * as nested tokens have corresponding "ending" tokens and we're
335: * breaking their scope.
336: */
337:
338: node = NULL;
339:
340: if (ROFF_LAYOUT == tokens[tok].type &&
341: ! (ROFF_NESTED & tokens[tok].flags)) {
342: for (node = tree->last; node; node = node->parent) {
343: if (node->tok == tok)
344: break;
345:
346: /* Don't break nested scope. */
347:
348: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
349: continue;
350: warnx("%s: scope of %s (line %zu) broken by "
351: "%s (line %zu)",
352: tree->rbuf->name,
353: tokens[tok].name,
354: node->line,
355: tokens[node->tok].name,
356: tree->rbuf->line);
357: return(0);
358: }
359: }
360:
361: if (node) {
362: assert(ROFF_LAYOUT == tokens[tok].type);
363: assert( ! (ROFF_NESTED & tokens[tok].flags));
364: assert(node->tok == tok);
365:
366: /* Clear up to last scoped token. */
367:
368: /* LINTED */
369: do {
370: t = tree->last->tok;
371: if ( ! (*tokens[tree->last->tok].cb)
1.2 kristaps 372: (tree->last->tok, tree, NULL, ROFF_EXIT))
1.1 kristaps 373: return(0);
374: } while (t != tok);
375: }
376:
377: /* Proceed with actual token processing. */
378:
379: argvp = (const char **)&argv[1];
1.2 kristaps 380: return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
1.1 kristaps 381: }
382:
383:
384: static int
385: rofffindarg(const char *name)
386: {
387: size_t i;
388:
389: /* FIXME: use a table, this is slow but ok for now. */
390:
391: /* LINTED */
392: for (i = 0; i < ROFF_ARGMAX; i++)
393: /* LINTED */
394: if (0 == strcmp(name, tokenargs[i].name))
395: return((int)i);
396:
397: return(ROFF_ARGMAX);
398: }
399:
400:
401: static int
402: rofffindtok(const char *name)
403: {
404: size_t i;
405:
406: /* FIXME: use a table, this is slow but ok for now. */
407:
408: /* LINTED */
409: for (i = 0; i < ROFF_MAX; i++)
410: /* LINTED */
411: if (0 == strncmp(name, tokens[i].name, 2))
412: return((int)i);
413:
414: return(ROFF_MAX);
415: }
416:
417:
1.2 kristaps 418: static int
419: rofffindcallable(const char *name)
420: {
421: int c;
422:
423: if (ROFF_MAX == (c = rofffindtok(name)))
424: return(ROFF_MAX);
425: return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
426: }
427:
428:
1.1 kristaps 429: /* FIXME: accept only struct rofftree *. */
430: static struct roffnode *
431: roffnode_new(int tokid, size_t line, struct rofftree *tree)
432: {
433: struct roffnode *p;
434:
435: if (NULL == (p = malloc(sizeof(struct roffnode)))) {
436: warn("malloc");
437: return(NULL);
438: }
439:
440: p->line = line;
441: p->tok = tokid;
442: p->parent = tree->last;
443: tree->last = p;
444: return(p);
445: }
446:
447:
448: static void
449: roffnode_free(int tokid, struct rofftree *tree)
450: {
451: struct roffnode *p;
452:
453: assert(tree->last);
454: assert(tree->last->tok == tokid);
455:
456: p = tree->last;
457: tree->last = tree->last->parent;
458: free(p);
459: }
460:
461:
462: /* FIXME: accept only struct rofftree *. */
463: /* ARGSUSED */
464: static int
465: roff_Dd(ROFFCALL_ARGS)
466: {
467:
468: assert(ROFF_PRELUDE & tree->state);
469: if (ROFF_PRELUDE_Dt & tree->state ||
470: ROFF_PRELUDE_Dd & tree->state) {
471: warnx("%s: prelude `Dd' out-of-order (line %zu)",
472: tree->rbuf->name, tree->rbuf->line);
473: return(0);
474: }
475:
476: assert(NULL == tree->last);
477: tree->state |= ROFF_PRELUDE_Dd;
478:
479: return(1);
480: }
481:
482:
483: /* ARGSUSED */
484: static int
485: roff_Dt(ROFFCALL_ARGS)
486: {
487:
488: assert(ROFF_PRELUDE & tree->state);
489: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
490: (ROFF_PRELUDE_Dt & tree->state)) {
491: warnx("%s: prelude `Dt' out-of-order (line %zu)",
492: tree->rbuf->name, tree->rbuf->line);
493: return(0);
494: }
495:
496: assert(NULL == tree->last);
497: tree->state |= ROFF_PRELUDE_Dt;
498:
499: return(1);
500: }
501:
502:
503: /* ARGSUSED */
504: static int
505: roff_Os(ROFFCALL_ARGS)
506: {
507:
508: if (ROFF_EXIT == type) {
509: roffnode_free(ROFF_Os, tree);
510: return(1);
511: }
512:
513: assert(ROFF_PRELUDE & tree->state);
514: if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
515: ! (ROFF_PRELUDE_Dd & tree->state)) {
516: warnx("%s: prelude `Os' out-of-order (line %zu)",
517: tree->rbuf->name, tree->rbuf->line);
518: return(0);
519: }
520:
521: assert(NULL == tree->last);
522: if (NULL == roffnode_new(ROFF_Os, tree->rbuf->line, tree))
523: return(0);
524:
525: tree->state |= ROFF_PRELUDE_Os;
526: tree->state &= ~ROFF_PRELUDE;
527: tree->state |= ROFF_BODY;
528:
529: return(1);
530: }
531:
532:
1.2 kristaps 533: /* ARGUSED */
1.1 kristaps 534: static int
535: roffnextopt(const char ***in, char **val)
536: {
537: const char *arg, **argv;
538: int v;
539:
540: *val = NULL;
541: argv = *in;
542: assert(argv);
543:
544: if (NULL == (arg = *argv))
545: return(-1);
546: if ('-' != *arg)
547: return(-1);
548: if (ROFF_ARGMAX == (v = rofffindarg(&arg[1])))
549: return(-1);
550: if ( ! (ROFF_VALUE & tokenargs[v].flags))
551: return(v);
552:
553: *in = ++argv;
554:
555: /* FIXME: what if this looks like a roff token or argument? */
556:
557: return(*argv ? v : ROFF_ARGMAX);
558: }
559:
560:
561: /* ARGSUSED */
562: static int
1.2 kristaps 563: roff_layout(ROFFCALL_ARGS)
1.1 kristaps 564: {
1.2 kristaps 565: int i, c, argcp[ROFF_MAXARG];
566: char *v, *argvp[ROFF_MAXARG];
1.1 kristaps 567:
1.2 kristaps 568: if (ROFF_EXIT == type) {
569: roffnode_free(tok, tree);
570: return((*tree->roffblkout[tok])(tok));
571: }
1.1 kristaps 572:
1.2 kristaps 573: i = 0;
574: while (-1 != (c = roffnextopt(&argv, &v))) {
575: if (ROFF_ARGMAX == c) {
576: warnx("%s: error parsing `%s' args (line %zu)",
1.1 kristaps 577: tree->rbuf->name,
1.2 kristaps 578: tokens[tok].name,
1.1 kristaps 579: tree->rbuf->line);
580: return(0);
581: }
1.2 kristaps 582: argcp[i] = c;
583: argvp[i] = v;
1.1 kristaps 584: argv++;
585: }
586:
1.2 kristaps 587: if (NULL == roffnode_new(tok, tree->rbuf->line, tree))
588: return(0);
589:
590: if ( ! (*tree->roffin[tok])(tok, argcp, argvp))
591: return(0);
592:
593: if ( ! (ROFF_PARSED & tokens[tok].flags)) {
594: /* TODO: print all tokens. */
595:
596: if ( ! ((*tree->roffout[tok])(tok)))
597: return(0);
598: return((*tree->roffblkin[tok])(tok));
599: }
600:
1.1 kristaps 601: while (*argv) {
1.2 kristaps 602: if (2 >= strlen(*argv) && ROFF_MAX !=
603: (c = rofffindcallable(*argv)))
604: if ( ! (*tokens[c].cb)(c, tree,
605: argv + 1, ROFF_ENTER))
606: return(0);
607:
608: /* TODO: print token. */
609: argv++;
610: }
611:
612: if ( ! ((*tree->roffout[tok])(tok)))
613: return(0);
614:
615: return((*tree->roffblkin[tok])(tok));
616: }
617:
618:
619: /* ARGSUSED */
620: static int
621: roff_text(ROFFCALL_ARGS)
622: {
623: int i, c, argcp[ROFF_MAXARG];
624: char *v, *argvp[ROFF_MAXARG];
625:
626: i = 0;
627: while (-1 != (c = roffnextopt(&argv, &v))) {
628: if (ROFF_ARGMAX == c) {
629: warnx("%s: error parsing `%s' args (line %zu)",
630: tree->rbuf->name,
631: tokens[tok].name,
632: tree->rbuf->line);
633: return(0);
1.1 kristaps 634: }
1.2 kristaps 635: argcp[i] = c;
636: argvp[i] = v;
1.1 kristaps 637: argv++;
638: }
639:
1.2 kristaps 640: if ( ! (*tree->roffin[tok])(tok, argcp, argvp))
641: return(0);
1.1 kristaps 642:
1.2 kristaps 643: if ( ! (ROFF_PARSED & tokens[tok].flags)) {
644: /* TODO: print all tokens. */
645: return((*tree->roffout[tok])(tok));
646: }
1.1 kristaps 647:
1.2 kristaps 648: while (*argv) {
649: if (2 >= strlen(*argv) && ROFF_MAX !=
650: (c = rofffindcallable(*argv)))
651: if ( ! (*tokens[c].cb)(c, tree,
652: argv + 1, ROFF_ENTER))
653: return(0);
654:
655: /* TODO: print token. */
656: argv++;
657: }
658:
659: return((*tree->roffout[tok])(tok));
1.1 kristaps 660: }
CVSweb