Annotation of mandoc/roff.c, Revision 1.2
1.2 ! kristaps 1: /* $Id: roff.c,v 1.1 2008/11/24 14:24:55 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the
7: * above copyright notice and this permission notice appear in all
8: * copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17: * PERFORMANCE OF THIS SOFTWARE.
18: */
19: #include <assert.h>
20: #include <ctype.h>
21: #include <err.h>
22: #include <stdlib.h>
23: #include <stdio.h>
24: #include <string.h>
25: #include <time.h>
26:
27: #include "libmdocml.h"
28: #include "private.h"
29:
30: #define ROFF_MAXARG 10
31:
32: enum roffd {
33: ROFF_ENTER = 0,
34: ROFF_EXIT
35: };
36:
37: enum rofftype {
38: ROFF_TITLE,
39: ROFF_COMMENT,
40: ROFF_TEXT,
41: ROFF_LAYOUT
42: };
43:
44: #define ROFFCALL_ARGS \
1.2 ! kristaps 45: int tok, struct rofftree *tree, \
! 46: const char *argv[], enum roffd type
1.1 kristaps 47:
48: struct rofftree;
49:
50: struct rofftok {
51: char *name;
52: int (*cb)(ROFFCALL_ARGS);
53: enum rofftype type;
54: int flags;
55: #define ROFF_NESTED (1 << 0)
56: #define ROFF_PARSED (1 << 1)
57: #define ROFF_CALLABLE (1 << 2)
58: #define ROFF_QUOTES (1 << 3)
59: };
60:
61: struct roffarg {
62: char *name;
63: int flags;
64: #define ROFF_VALUE (1 << 0)
65: };
66:
67: struct roffnode {
68: int tok;
69: struct roffnode *parent;
70: size_t line;
71: };
72:
73: struct rofftree {
74: struct roffnode *last;
75: time_t date;
76: char title[256];
77: char section[256];
78: char volume[256];
79: int state;
80: #define ROFF_PRELUDE (1 << 1)
81: #define ROFF_PRELUDE_Os (1 << 2)
82: #define ROFF_PRELUDE_Dt (1 << 3)
83: #define ROFF_PRELUDE_Dd (1 << 4)
84: #define ROFF_BODY (1 << 5)
85: struct md_mbuf *mbuf; /* NULL if ROFF_EXIT and error. */
86:
87: const struct md_args *args;
88: const struct md_rbuf *rbuf;
1.2 ! kristaps 89: const roffin *roffin;
! 90: const roffblkin *roffblkin;
! 91: const roffout *roffout;
! 92: const roffblkout *roffblkout;
1.1 kristaps 93: };
94:
95: static int roff_Dd(ROFFCALL_ARGS);
96: static int roff_Dt(ROFFCALL_ARGS);
97: static int roff_Os(ROFFCALL_ARGS);
1.2 ! kristaps 98:
! 99: static int roff_layout(ROFFCALL_ARGS);
! 100: static int roff_text(ROFFCALL_ARGS);
1.1 kristaps 101:
102: static struct roffnode *roffnode_new(int, size_t,
103: struct rofftree *);
104: static void roffnode_free(int, struct rofftree *);
105:
106: static int rofffindtok(const char *);
107: static int rofffindarg(const char *);
1.2 ! kristaps 108: static int rofffindcallable(const char *);
1.1 kristaps 109: static int roffargs(int, char *, char **);
110: static int roffparse(struct rofftree *, char *, size_t);
111: static int textparse(const struct rofftree *,
112: const char *, size_t);
113:
114:
115: static const struct rofftok tokens[ROFF_MAX] = {
116: { "\\\"", NULL, ROFF_COMMENT, 0 },
117: { "Dd", roff_Dd, ROFF_TITLE, 0 },
118: { "Dt", roff_Dt, ROFF_TITLE, 0 },
119: { "Os", roff_Os, ROFF_TITLE, 0 },
1.2 ! kristaps 120: { "Sh", roff_layout, ROFF_LAYOUT, ROFF_PARSED },
! 121: { "An", roff_text, ROFF_TEXT, ROFF_PARSED },
! 122: { "Li", roff_text, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
1.1 kristaps 123: };
124:
125: static const struct roffarg tokenargs[ROFF_ARGMAX] = {
126: { "split", 0 },
127: { "nosplit", 0 },
128: };
129:
130:
131: int
132: roff_free(struct rofftree *tree, int flush)
133: {
134: int error;
135:
136: assert(tree->mbuf);
137: if ( ! flush)
138: tree->mbuf = NULL;
139:
140: /* LINTED */
141: while (tree->last)
142: if ( ! (*tokens[tree->last->tok].cb)
1.2 ! kristaps 143: (tree->last->tok, tree, NULL, ROFF_EXIT))
1.1 kristaps 144: /* Disallow flushing. */
145: tree->mbuf = NULL;
146:
147: error = tree->mbuf ? 0 : 1;
148:
149: if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
150: warnx("%s: prelude never finished",
151: tree->rbuf->name);
152: error = 1;
153: }
154:
155: free(tree);
156: return(error ? 0 : 1);
157: }
158:
159:
160: struct rofftree *
161: roff_alloc(const struct md_args *args, struct md_mbuf *out,
1.2 ! kristaps 162: const struct md_rbuf *in,
! 163: const roffin *roffin, const roffout *roffout,
! 164: const roffblkin *roffblkin,
! 165: const roffblkout *roffblkout)
1.1 kristaps 166: {
167: struct rofftree *tree;
168:
169: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
170: warn("malloc");
171: return(NULL);
172: }
173:
174: tree->state = ROFF_PRELUDE;
175: tree->args = args;
176: tree->mbuf = out;
177: tree->rbuf = in;
1.2 ! kristaps 178: tree->roffin = roffin;
! 179: tree->roffout = roffout;
! 180: tree->roffblkin = roffblkin;
! 181: tree->roffblkout = roffblkout;
1.1 kristaps 182:
183: return(tree);
184: }
185:
186:
187: int
188: roff_engine(struct rofftree *tree, char *buf, size_t sz)
189: {
190:
191: if (0 == sz) {
192: warnx("%s: blank line (line %zu)",
193: tree->rbuf->name,
194: tree->rbuf->line);
195: return(0);
196: } else if ('.' != *buf)
197: return(textparse(tree, buf, sz));
198:
199: return(roffparse(tree, buf, sz));
200: }
201:
202:
203: static int
204: textparse(const struct rofftree *tree, const char *buf, size_t sz)
205: {
206:
207: if (NULL == tree->last) {
208: warnx("%s: unexpected text (line %zu)",
209: tree->rbuf->name,
210: tree->rbuf->line);
211: return(0);
212: } else if (NULL == tree->last->parent) {
213: warnx("%s: disallowed text (line %zu)",
214: tree->rbuf->name,
215: tree->rbuf->line);
216: return(0);
217: }
218:
219: /* Print text. */
220:
221: return(1);
222: }
223:
224:
225: static int
226: roffargs(int tok, char *buf, char **argv)
227: {
228: int i;
229:
230: (void)tok;/* FIXME: quotable strings? */
231:
232: assert(tok >= 0 && tok < ROFF_MAX);
233: assert('.' == *buf);
234:
235: /* LINTED */
236: for (i = 0; *buf && i < ROFF_MAXARG; i++) {
237: argv[i] = buf++;
238: while (*buf && ! isspace(*buf))
239: buf++;
240: if (0 == *buf) {
241: continue;
242: }
243: *buf++ = 0;
244: while (*buf && isspace(*buf))
245: buf++;
246: }
247:
248: assert(i > 0);
249: if (i < ROFF_MAXARG)
250: argv[i] = NULL;
251:
252: return(ROFF_MAXARG > i);
253: }
254:
255:
256: static int
257: roffparse(struct rofftree *tree, char *buf, size_t sz)
258: {
259: int tok, t;
260: struct roffnode *node;
261: char *argv[ROFF_MAXARG];
262: const char **argvp;
263:
264: assert(sz > 0);
265:
266: /*
267: * Extract the token identifier from the buffer. If there's no
268: * callback for the token (comment, etc.) then exit immediately.
269: * We don't do any error handling (yet), so if the token doesn't
270: * exist, die.
271: */
272:
273: if (3 > sz) {
274: warnx("%s: malformed line (line %zu)",
275: tree->rbuf->name,
276: tree->rbuf->line);
277: return(0);
278: } else if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
279: warnx("%s: unknown line token `%c%c' (line %zu)",
280: tree->rbuf->name,
281: *(buf + 1), *(buf + 2),
282: tree->rbuf->line);
283: return(0);
284: } else if (ROFF_COMMENT == tokens[tok].type)
285: /* Ignore comment tokens. */
286: return(1);
287:
288: if ( ! roffargs(tok, buf, argv)) {
289: warnx("%s: too many arguments to `%s' (line %zu)",
290: tree->rbuf->name, tokens[tok].name,
291: tree->rbuf->line);
292: return(0);
293: }
294:
295: /* Domain cross-contamination (and sanity) checks. */
296:
297: switch (tokens[tok].type) {
298: case (ROFF_TITLE):
299: if (ROFF_PRELUDE & tree->state) {
300: assert( ! (ROFF_BODY & tree->state));
301: break;
302: }
303: assert(ROFF_BODY & tree->state);
304: warnx("%s: prelude token `%s' in body (line %zu)",
305: tree->rbuf->name, tokens[tok].name,
306: tree->rbuf->line);
307: return(0);
308: case (ROFF_LAYOUT):
309: /* FALLTHROUGH */
310: case (ROFF_TEXT):
311: if (ROFF_BODY & tree->state) {
312: assert( ! (ROFF_PRELUDE & tree->state));
313: break;
314: }
315: assert(ROFF_PRELUDE & tree->state);
316: warnx("%s: body token `%s' in prelude (line %zu)",
317: tree->rbuf->name, tokens[tok].name,
318: tree->rbuf->line);
319: return(0);
320: case (ROFF_COMMENT):
321: return(1);
322: default:
323: abort();
324: }
325:
326: /*
327: * If this is a non-nestable layout token and we're below a
328: * token of the same type, then recurse upward to the token,
329: * closing out the interim scopes.
330: *
331: * If there's a nested token on the chain, then raise an error
332: * as nested tokens have corresponding "ending" tokens and we're
333: * breaking their scope.
334: */
335:
336: node = NULL;
337:
338: if (ROFF_LAYOUT == tokens[tok].type &&
339: ! (ROFF_NESTED & tokens[tok].flags)) {
340: for (node = tree->last; node; node = node->parent) {
341: if (node->tok == tok)
342: break;
343:
344: /* Don't break nested scope. */
345:
346: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
347: continue;
348: warnx("%s: scope of %s (line %zu) broken by "
349: "%s (line %zu)",
350: tree->rbuf->name,
351: tokens[tok].name,
352: node->line,
353: tokens[node->tok].name,
354: tree->rbuf->line);
355: return(0);
356: }
357: }
358:
359: if (node) {
360: assert(ROFF_LAYOUT == tokens[tok].type);
361: assert( ! (ROFF_NESTED & tokens[tok].flags));
362: assert(node->tok == tok);
363:
364: /* Clear up to last scoped token. */
365:
366: /* LINTED */
367: do {
368: t = tree->last->tok;
369: if ( ! (*tokens[tree->last->tok].cb)
1.2 ! kristaps 370: (tree->last->tok, tree, NULL, ROFF_EXIT))
1.1 kristaps 371: return(0);
372: } while (t != tok);
373: }
374:
375: /* Proceed with actual token processing. */
376:
377: argvp = (const char **)&argv[1];
1.2 ! kristaps 378: return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
1.1 kristaps 379: }
380:
381:
382: static int
383: rofffindarg(const char *name)
384: {
385: size_t i;
386:
387: /* FIXME: use a table, this is slow but ok for now. */
388:
389: /* LINTED */
390: for (i = 0; i < ROFF_ARGMAX; i++)
391: /* LINTED */
392: if (0 == strcmp(name, tokenargs[i].name))
393: return((int)i);
394:
395: return(ROFF_ARGMAX);
396: }
397:
398:
399: static int
400: rofffindtok(const char *name)
401: {
402: size_t i;
403:
404: /* FIXME: use a table, this is slow but ok for now. */
405:
406: /* LINTED */
407: for (i = 0; i < ROFF_MAX; i++)
408: /* LINTED */
409: if (0 == strncmp(name, tokens[i].name, 2))
410: return((int)i);
411:
412: return(ROFF_MAX);
413: }
414:
415:
1.2 ! kristaps 416: static int
! 417: rofffindcallable(const char *name)
! 418: {
! 419: int c;
! 420:
! 421: if (ROFF_MAX == (c = rofffindtok(name)))
! 422: return(ROFF_MAX);
! 423: return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
! 424: }
! 425:
! 426:
1.1 kristaps 427: /* FIXME: accept only struct rofftree *. */
428: static struct roffnode *
429: roffnode_new(int tokid, size_t line, struct rofftree *tree)
430: {
431: struct roffnode *p;
432:
433: if (NULL == (p = malloc(sizeof(struct roffnode)))) {
434: warn("malloc");
435: return(NULL);
436: }
437:
438: p->line = line;
439: p->tok = tokid;
440: p->parent = tree->last;
441: tree->last = p;
442: return(p);
443: }
444:
445:
446: static void
447: roffnode_free(int tokid, struct rofftree *tree)
448: {
449: struct roffnode *p;
450:
451: assert(tree->last);
452: assert(tree->last->tok == tokid);
453:
454: p = tree->last;
455: tree->last = tree->last->parent;
456: free(p);
457: }
458:
459:
460: /* FIXME: accept only struct rofftree *. */
461: /* ARGSUSED */
462: static int
463: roff_Dd(ROFFCALL_ARGS)
464: {
465:
466: assert(ROFF_PRELUDE & tree->state);
467: if (ROFF_PRELUDE_Dt & tree->state ||
468: ROFF_PRELUDE_Dd & tree->state) {
469: warnx("%s: prelude `Dd' out-of-order (line %zu)",
470: tree->rbuf->name, tree->rbuf->line);
471: return(0);
472: }
473:
474: assert(NULL == tree->last);
475: tree->state |= ROFF_PRELUDE_Dd;
476:
477: return(1);
478: }
479:
480:
481: /* ARGSUSED */
482: static int
483: roff_Dt(ROFFCALL_ARGS)
484: {
485:
486: assert(ROFF_PRELUDE & tree->state);
487: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
488: (ROFF_PRELUDE_Dt & tree->state)) {
489: warnx("%s: prelude `Dt' out-of-order (line %zu)",
490: tree->rbuf->name, tree->rbuf->line);
491: return(0);
492: }
493:
494: assert(NULL == tree->last);
495: tree->state |= ROFF_PRELUDE_Dt;
496:
497: return(1);
498: }
499:
500:
501: /* ARGSUSED */
502: static int
503: roff_Os(ROFFCALL_ARGS)
504: {
505:
506: if (ROFF_EXIT == type) {
507: roffnode_free(ROFF_Os, tree);
508: return(1);
509: }
510:
511: assert(ROFF_PRELUDE & tree->state);
512: if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
513: ! (ROFF_PRELUDE_Dd & tree->state)) {
514: warnx("%s: prelude `Os' out-of-order (line %zu)",
515: tree->rbuf->name, tree->rbuf->line);
516: return(0);
517: }
518:
519: assert(NULL == tree->last);
520: if (NULL == roffnode_new(ROFF_Os, tree->rbuf->line, tree))
521: return(0);
522:
523: tree->state |= ROFF_PRELUDE_Os;
524: tree->state &= ~ROFF_PRELUDE;
525: tree->state |= ROFF_BODY;
526:
527: return(1);
528: }
529:
530:
1.2 ! kristaps 531: /* ARGUSED */
1.1 kristaps 532: static int
533: roffnextopt(const char ***in, char **val)
534: {
535: const char *arg, **argv;
536: int v;
537:
538: *val = NULL;
539: argv = *in;
540: assert(argv);
541:
542: if (NULL == (arg = *argv))
543: return(-1);
544: if ('-' != *arg)
545: return(-1);
546: if (ROFF_ARGMAX == (v = rofffindarg(&arg[1])))
547: return(-1);
548: if ( ! (ROFF_VALUE & tokenargs[v].flags))
549: return(v);
550:
551: *in = ++argv;
552:
553: /* FIXME: what if this looks like a roff token or argument? */
554:
555: return(*argv ? v : ROFF_ARGMAX);
556: }
557:
558:
559: /* ARGSUSED */
560: static int
1.2 ! kristaps 561: roff_layout(ROFFCALL_ARGS)
1.1 kristaps 562: {
1.2 ! kristaps 563: int i, c, argcp[ROFF_MAXARG];
! 564: char *v, *argvp[ROFF_MAXARG];
1.1 kristaps 565:
1.2 ! kristaps 566: if (ROFF_EXIT == type) {
! 567: roffnode_free(tok, tree);
! 568: return((*tree->roffblkout[tok])(tok));
! 569: }
1.1 kristaps 570:
1.2 ! kristaps 571: i = 0;
! 572: while (-1 != (c = roffnextopt(&argv, &v))) {
! 573: if (ROFF_ARGMAX == c) {
! 574: warnx("%s: error parsing `%s' args (line %zu)",
1.1 kristaps 575: tree->rbuf->name,
1.2 ! kristaps 576: tokens[tok].name,
1.1 kristaps 577: tree->rbuf->line);
578: return(0);
579: }
1.2 ! kristaps 580: argcp[i] = c;
! 581: argvp[i] = v;
1.1 kristaps 582: argv++;
583: }
584:
1.2 ! kristaps 585: if (NULL == roffnode_new(tok, tree->rbuf->line, tree))
! 586: return(0);
! 587:
! 588: if ( ! (*tree->roffin[tok])(tok, argcp, argvp))
! 589: return(0);
! 590:
! 591: if ( ! (ROFF_PARSED & tokens[tok].flags)) {
! 592: /* TODO: print all tokens. */
! 593:
! 594: if ( ! ((*tree->roffout[tok])(tok)))
! 595: return(0);
! 596: return((*tree->roffblkin[tok])(tok));
! 597: }
! 598:
1.1 kristaps 599: while (*argv) {
1.2 ! kristaps 600: if (2 >= strlen(*argv) && ROFF_MAX !=
! 601: (c = rofffindcallable(*argv)))
! 602: if ( ! (*tokens[c].cb)(c, tree,
! 603: argv + 1, ROFF_ENTER))
! 604: return(0);
! 605:
! 606: /* TODO: print token. */
! 607: argv++;
! 608: }
! 609:
! 610: if ( ! ((*tree->roffout[tok])(tok)))
! 611: return(0);
! 612:
! 613: return((*tree->roffblkin[tok])(tok));
! 614: }
! 615:
! 616:
! 617: /* ARGSUSED */
! 618: static int
! 619: roff_text(ROFFCALL_ARGS)
! 620: {
! 621: int i, c, argcp[ROFF_MAXARG];
! 622: char *v, *argvp[ROFF_MAXARG];
! 623:
! 624: i = 0;
! 625: while (-1 != (c = roffnextopt(&argv, &v))) {
! 626: if (ROFF_ARGMAX == c) {
! 627: warnx("%s: error parsing `%s' args (line %zu)",
! 628: tree->rbuf->name,
! 629: tokens[tok].name,
! 630: tree->rbuf->line);
! 631: return(0);
1.1 kristaps 632: }
1.2 ! kristaps 633: argcp[i] = c;
! 634: argvp[i] = v;
1.1 kristaps 635: argv++;
636: }
637:
1.2 ! kristaps 638: if ( ! (*tree->roffin[tok])(tok, argcp, argvp))
! 639: return(0);
1.1 kristaps 640:
1.2 ! kristaps 641: if ( ! (ROFF_PARSED & tokens[tok].flags)) {
! 642: /* TODO: print all tokens. */
! 643: return((*tree->roffout[tok])(tok));
! 644: }
1.1 kristaps 645:
1.2 ! kristaps 646: while (*argv) {
! 647: if (2 >= strlen(*argv) && ROFF_MAX !=
! 648: (c = rofffindcallable(*argv)))
! 649: if ( ! (*tokens[c].cb)(c, tree,
! 650: argv + 1, ROFF_ENTER))
! 651: return(0);
! 652:
! 653: /* TODO: print token. */
! 654: argv++;
! 655: }
! 656:
! 657: return((*tree->roffout[tok])(tok));
1.1 kristaps 658: }
CVSweb