Annotation of mandoc/roff.c, Revision 1.81
1.81 ! kristaps 1: /* $Id: roff.c,v 1.80 2010/05/17 00:37:26 kristaps Exp $ */
1.1 kristaps 2: /*
1.67 kristaps 3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.66 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.66 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
1.30 kristaps 20:
1.67 kristaps 21: #include <assert.h>
1.1 kristaps 22: #include <stdlib.h>
1.67 kristaps 23: #include <string.h>
1.75 kristaps 24: #include <stdio.h>
1.1 kristaps 25:
1.67 kristaps 26: #include "mandoc.h"
1.43 kristaps 27: #include "roff.h"
1.33 kristaps 28:
1.75 kristaps 29: #define ROFF_CTL(c) \
30: ('.' == (c) || '\'' == (c))
31:
1.67 kristaps 32: enum rofft {
1.80 kristaps 33: ROFF_am,
34: ROFF_ami,
35: ROFF_am1,
36: ROFF_de,
37: ROFF_dei,
38: ROFF_de1,
1.75 kristaps 39: ROFF_if,
1.76 kristaps 40: ROFF_ig,
41: ROFF_cblock,
1.75 kristaps 42: ROFF_ccond,
1.74 kristaps 43: #if 0
1.79 kristaps 44: ROFF_ie,
45: ROFF_el,
1.74 kristaps 46: #endif
1.67 kristaps 47: ROFF_MAX
48: };
49:
50: struct roff {
51: struct roffnode *last; /* leaf of stack */
52: mandocmsg msg; /* err/warn/fatal messages */
53: void *data; /* privdata for messages */
54: };
55:
1.79 kristaps 56: enum roffrule {
57: ROFFRULE_ALLOW,
58: ROFFRULE_DENY
59: };
60:
1.67 kristaps 61: struct roffnode {
62: enum rofft tok; /* type of node */
63: struct roffnode *parent; /* up one in stack */
64: int line; /* parse line */
65: int col; /* parse col */
1.79 kristaps 66: char *end; /* end-rules: custom token */
67: int endspan; /* end-rules: next-line or infty */
68: enum roffrule rule;
1.67 kristaps 69: };
70:
71: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
1.72 kristaps 72: enum rofft tok, /* tok of macro */ \
1.67 kristaps 73: char **bufp, /* input buffer */ \
74: size_t *szp, /* size of input buffer */ \
75: int ln, /* parse line */ \
1.75 kristaps 76: int ppos, /* original pos in buffer */ \
77: int pos, /* current pos in buffer */ \
1.74 kristaps 78: int *offs /* reset offset of buffer data */
1.67 kristaps 79:
80: typedef enum rofferr (*roffproc)(ROFF_ARGS);
81:
82: struct roffmac {
83: const char *name; /* macro name */
1.79 kristaps 84: roffproc proc; /* process new macro */
85: roffproc text; /* process as child text of macro */
86: roffproc sub; /* process as child of macro */
87: int flags;
88: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.67 kristaps 89: };
90:
1.80 kristaps 91: static enum rofferr roff_block(ROFF_ARGS);
92: static enum rofferr roff_block_text(ROFF_ARGS);
93: static enum rofferr roff_block_sub(ROFF_ARGS);
94: static enum rofferr roff_cblock(ROFF_ARGS);
95: static enum rofferr roff_ccond(ROFF_ARGS);
1.75 kristaps 96: static enum rofferr roff_if(ROFF_ARGS);
1.78 kristaps 97: static enum rofferr roff_if_text(ROFF_ARGS);
1.79 kristaps 98: static enum rofferr roff_if_sub(ROFF_ARGS);
1.67 kristaps 99:
100: const struct roffmac roffs[ROFF_MAX] = {
1.80 kristaps 101: { "am", roff_block, roff_block_text, roff_block_sub, 0 },
102: { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
103: { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
104: { "de", roff_block, roff_block_text, roff_block_sub, 0 },
105: { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
106: { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
1.79 kristaps 107: { "if", roff_if, roff_if_text, roff_if_sub, ROFFMAC_STRUCT },
1.80 kristaps 108: { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
1.79 kristaps 109: { ".", roff_cblock, NULL, NULL, 0 },
110: { "\\}", roff_ccond, NULL, NULL, 0 },
1.67 kristaps 111: };
112:
113: static void roff_free1(struct roff *);
114: static enum rofft roff_hash_find(const char *);
1.76 kristaps 115: static void roffnode_cleanscope(struct roff *);
1.67 kristaps 116: static int roffnode_push(struct roff *,
117: enum rofft, int, int);
118: static void roffnode_pop(struct roff *);
119: static enum rofft roff_parse(const char *, int *);
120:
121:
122: /*
123: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
124: * the nil-terminated string name could be found.
125: */
126: static enum rofft
127: roff_hash_find(const char *p)
128: {
129: int i;
130:
131: /* FIXME: make this be fast and efficient. */
132:
133: for (i = 0; i < (int)ROFF_MAX; i++)
134: if (0 == strcmp(roffs[i].name, p))
135: return((enum rofft)i);
136:
137: return(ROFF_MAX);
138: }
139:
140:
141: /*
142: * Pop the current node off of the stack of roff instructions currently
143: * pending.
144: */
145: static void
146: roffnode_pop(struct roff *r)
147: {
148: struct roffnode *p;
149:
1.75 kristaps 150: assert(r->last);
151: p = r->last;
152: r->last = r->last->parent;
1.74 kristaps 153: if (p->end)
154: free(p->end);
1.67 kristaps 155: free(p);
156: }
157:
158:
159: /*
160: * Push a roff node onto the instruction stack. This must later be
161: * removed with roffnode_pop().
162: */
163: static int
164: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
165: {
166: struct roffnode *p;
167:
168: if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
169: (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
170: return(0);
171: }
172:
173: p->tok = tok;
174: p->parent = r->last;
175: p->line = line;
176: p->col = col;
1.79 kristaps 177: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.67 kristaps 178:
179: r->last = p;
180: return(1);
181: }
182:
183:
184: static void
185: roff_free1(struct roff *r)
186: {
187:
188: while (r->last)
189: roffnode_pop(r);
190: }
191:
192:
193: void
194: roff_reset(struct roff *r)
195: {
196:
197: roff_free1(r);
198: }
199:
200:
201: void
202: roff_free(struct roff *r)
203: {
204:
205: roff_free1(r);
206: free(r);
207: }
208:
209:
210: struct roff *
211: roff_alloc(const mandocmsg msg, void *data)
212: {
213: struct roff *r;
214:
215: if (NULL == (r = calloc(1, sizeof(struct roff)))) {
216: (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
217: return(0);
218: }
219:
220: r->msg = msg;
221: r->data = data;
222: return(r);
223: }
224:
225:
226: enum rofferr
1.74 kristaps 227: roff_parseln(struct roff *r, int ln,
228: char **bufp, size_t *szp, int pos, int *offs)
1.67 kristaps 229: {
230: enum rofft t;
1.79 kristaps 231: int ppos;
232:
233: /*
234: * First, if a scope is open and we're not a macro, pass the
235: * text through the macro's filter. If a scope isn't open and
236: * we're not a macro, just let it through.
237: */
1.74 kristaps 238:
1.75 kristaps 239: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
1.78 kristaps 240: t = r->last->tok;
241: assert(roffs[t].text);
242: return((*roffs[t].text)
243: (r, t, bufp, szp, ln, pos, pos, offs));
1.75 kristaps 244: } else if ( ! ROFF_CTL((*bufp)[pos]))
1.67 kristaps 245: return(ROFF_CONT);
246:
1.79 kristaps 247: /*
248: * If a scope is open, go to the child handler for that macro,
249: * as it may want to preprocess before doing anything with it.
250: */
1.78 kristaps 251:
1.79 kristaps 252: if (r->last) {
253: t = r->last->tok;
254: assert(roffs[t].sub);
255: return((*roffs[t].sub)
256: (r, t, bufp, szp, ln, pos, pos, offs));
257: }
1.78 kristaps 258:
1.79 kristaps 259: /*
260: * Lastly, as we've no scope open, try to look up and execute
261: * the new macro. If no macro is found, simply return and let
262: * the compilers handle it.
263: */
1.67 kristaps 264:
1.75 kristaps 265: ppos = pos;
1.79 kristaps 266: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
267: return(ROFF_CONT);
1.67 kristaps 268:
1.75 kristaps 269: assert(roffs[t].proc);
1.78 kristaps 270: return((*roffs[t].proc)
271: (r, t, bufp, szp, ln, ppos, pos, offs));
1.74 kristaps 272: }
273:
274:
275: int
276: roff_endparse(struct roff *r)
277: {
278:
279: if (NULL == r->last)
280: return(1);
281: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
282: r->last->col, NULL));
1.67 kristaps 283: }
284:
285:
286: /*
287: * Parse a roff node's type from the input buffer. This must be in the
288: * form of ".foo xxx" in the usual way.
289: */
290: static enum rofft
291: roff_parse(const char *buf, int *pos)
292: {
293: int j;
294: char mac[5];
295: enum rofft t;
296:
1.75 kristaps 297: assert(ROFF_CTL(buf[*pos]));
298: (*pos)++;
1.67 kristaps 299:
300: while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
301: (*pos)++;
302:
303: if ('\0' == buf[*pos])
304: return(ROFF_MAX);
305:
306: for (j = 0; j < 4; j++, (*pos)++)
307: if ('\0' == (mac[j] = buf[*pos]))
308: break;
309: else if (' ' == buf[*pos])
310: break;
311:
312: if (j == 4 || j < 1)
313: return(ROFF_MAX);
314:
315: mac[j] = '\0';
316:
317: if (ROFF_MAX == (t = roff_hash_find(mac)))
318: return(t);
319:
320: while (buf[*pos] && ' ' == buf[*pos])
321: (*pos)++;
322:
323: return(t);
324: }
325:
326:
327: /* ARGSUSED */
328: static enum rofferr
1.76 kristaps 329: roff_cblock(ROFF_ARGS)
1.67 kristaps 330: {
331:
1.79 kristaps 332: /*
333: * A block-close `..' should only be invoked as a child of an
334: * ignore macro, otherwise raise a warning and just ignore it.
335: */
336:
1.76 kristaps 337: if (NULL == r->last) {
338: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
339: return(ROFF_ERR);
340: return(ROFF_IGN);
341: }
1.67 kristaps 342:
1.81 ! kristaps 343: switch (r->last->tok) {
! 344: case (ROFF_am):
! 345: /* FALLTHROUGH */
! 346: case (ROFF_ami):
! 347: /* FALLTHROUGH */
! 348: case (ROFF_am1):
! 349: /* FALLTHROUGH */
! 350: case (ROFF_de):
! 351: /* FALLTHROUGH */
! 352: case (ROFF_dei):
! 353: /* FALLTHROUGH */
! 354: case (ROFF_de1):
! 355: /* FALLTHROUGH */
! 356: case (ROFF_ig):
! 357: break;
! 358: default:
1.76 kristaps 359: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
360: return(ROFF_ERR);
1.67 kristaps 361: return(ROFF_IGN);
1.76 kristaps 362: }
1.67 kristaps 363:
1.76 kristaps 364: if ((*bufp)[pos])
365: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
366: return(ROFF_ERR);
1.71 kristaps 367:
368: roffnode_pop(r);
1.76 kristaps 369: roffnode_cleanscope(r);
370: return(ROFF_IGN);
1.71 kristaps 371:
1.67 kristaps 372: }
373:
374:
1.76 kristaps 375: static void
376: roffnode_cleanscope(struct roff *r)
1.67 kristaps 377: {
378:
1.76 kristaps 379: while (r->last) {
380: if (--r->last->endspan < 0)
381: break;
382: roffnode_pop(r);
383: }
1.67 kristaps 384: }
385:
386:
1.75 kristaps 387: /* ARGSUSED */
1.74 kristaps 388: static enum rofferr
1.75 kristaps 389: roff_ccond(ROFF_ARGS)
1.74 kristaps 390: {
391:
1.76 kristaps 392: if (NULL == r->last) {
393: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
394: return(ROFF_ERR);
395: return(ROFF_IGN);
396: }
397:
398: if (ROFF_if != r->last->tok) {
1.75 kristaps 399: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
400: return(ROFF_ERR);
401: return(ROFF_IGN);
402: }
403:
1.76 kristaps 404: if (r->last->endspan > -1) {
405: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
406: return(ROFF_ERR);
407: return(ROFF_IGN);
408: }
409:
410: if ((*bufp)[pos])
411: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
412: return(ROFF_ERR);
413:
1.75 kristaps 414: roffnode_pop(r);
1.76 kristaps 415: roffnode_cleanscope(r);
416: return(ROFF_IGN);
417: }
418:
1.75 kristaps 419:
1.76 kristaps 420: /* ARGSUSED */
421: static enum rofferr
1.80 kristaps 422: roff_block(ROFF_ARGS)
1.76 kristaps 423: {
1.78 kristaps 424: int sv;
425: size_t sz;
1.76 kristaps 426:
1.80 kristaps 427: if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
428: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
429: return(ROFF_ERR);
430: return(ROFF_IGN);
431: } else if (ROFF_ig != tok) {
432: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
433: pos++;
434: while (' ' == (*bufp)[pos])
435: pos++;
436: }
437:
1.76 kristaps 438: if ( ! roffnode_push(r, tok, ln, ppos))
439: return(ROFF_ERR);
440:
1.79 kristaps 441: if ('\0' == (*bufp)[pos])
1.78 kristaps 442: return(ROFF_IGN);
443:
444: sv = pos;
445: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
446: '\t' != (*bufp)[pos])
447: pos++;
448:
449: /*
450: * Note: groff does NOT like escape characters in the input.
451: * Instead of detecting this, we're just going to let it fly and
452: * to hell with it.
453: */
454:
455: assert(pos > sv);
456: sz = (size_t)(pos - sv);
457:
1.79 kristaps 458: if (1 == sz && '.' == (*bufp)[sv])
459: return(ROFF_IGN);
460:
1.78 kristaps 461: r->last->end = malloc(sz + 1);
462:
463: if (NULL == r->last->end) {
464: (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
465: return(ROFF_ERR);
466: }
467:
468: memcpy(r->last->end, *bufp + sv, sz);
469: r->last->end[(int)sz] = '\0';
470:
1.77 kristaps 471: if ((*bufp)[pos])
472: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
473: return(ROFF_ERR);
1.74 kristaps 474:
1.78 kristaps 475: return(ROFF_IGN);
476: }
477:
478:
479: /* ARGSUSED */
480: static enum rofferr
1.79 kristaps 481: roff_if_sub(ROFF_ARGS)
482: {
483: enum rofft t;
484: enum roffrule rr;
485:
486: ppos = pos;
487: rr = r->last->rule;
488: roffnode_cleanscope(r);
489:
490: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
491: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
492:
493: /*
494: * A denied conditional must evaluate its children if and only
495: * if they're either structurally required (such as loops and
496: * conditionals) or a closing macro.
497: */
498: if (ROFFRULE_DENY == rr)
499: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
500: if (ROFF_ccond != t)
501: return(ROFF_IGN);
502:
503: assert(roffs[t].proc);
504: return((*roffs[t].proc)
505: (r, t, bufp, szp, ln, ppos, pos, offs));
506: }
507:
508:
509: /* ARGSUSED */
510: static enum rofferr
1.80 kristaps 511: roff_block_sub(ROFF_ARGS)
1.79 kristaps 512: {
513: enum rofft t;
514: int i, j;
515:
516: /*
517: * First check whether a custom macro exists at this level. If
518: * it does, then check against it. This is some of groff's
519: * stranger behaviours. If we encountered a custom end-scope
520: * tag and that tag also happens to be a "real" macro, then we
521: * need to try interpreting it again as a real macro. If it's
522: * not, then return ignore. Else continue.
523: */
524:
525: if (r->last->end) {
526: i = pos + 1;
527: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
528: i++;
529:
530: for (j = 0; r->last->end[j]; j++, i++)
531: if ((*bufp)[i] != r->last->end[j])
532: break;
533:
534: if ('\0' == r->last->end[j] &&
535: ('\0' == (*bufp)[i] ||
536: ' ' == (*bufp)[i] ||
537: '\t' == (*bufp)[i])) {
538: roffnode_pop(r);
539: roffnode_cleanscope(r);
540:
541: if (ROFF_MAX != roff_parse(*bufp, &pos))
542: return(ROFF_RERUN);
543: return(ROFF_IGN);
544: }
545: }
546:
547: /*
548: * If we have no custom end-query or lookup failed, then try
549: * pulling it out of the hashtable.
550: */
551:
552: ppos = pos;
553: t = roff_parse(*bufp, &pos);
554:
555: /* If we're not a comment-end, then throw it away. */
556: if (ROFF_cblock != t)
557: return(ROFF_IGN);
558:
559: assert(roffs[t].proc);
560: return((*roffs[t].proc)(r, t, bufp,
561: szp, ln, ppos, pos, offs));
562: }
563:
564:
565: /* ARGSUSED */
566: static enum rofferr
1.80 kristaps 567: roff_block_text(ROFF_ARGS)
1.78 kristaps 568: {
569:
570: return(ROFF_IGN);
571: }
572:
573:
574: /* ARGSUSED */
575: static enum rofferr
576: roff_if_text(ROFF_ARGS)
577: {
578: char *ep, *st;
579:
580: st = &(*bufp)[pos];
581: if (NULL == (ep = strstr(st, "\\}"))) {
582: roffnode_cleanscope(r);
583: return(ROFF_IGN);
584: }
585:
1.79 kristaps 586: if (ep > st && '\\' != *(ep - 1))
1.78 kristaps 587: roffnode_pop(r);
588:
589: roffnode_cleanscope(r);
1.74 kristaps 590: return(ROFF_IGN);
591: }
592:
593:
1.75 kristaps 594: /* ARGSUSED */
1.74 kristaps 595: static enum rofferr
1.75 kristaps 596: roff_if(ROFF_ARGS)
1.74 kristaps 597: {
1.77 kristaps 598: int sv;
1.74 kristaps 599:
600: /*
601: * Read ahead past the conditional.
602: * FIXME: this does not work, as conditionals don't end on
603: * whitespace, but are parsed according to a formal grammar.
604: * It's good enough for now, however.
605: */
606:
1.75 kristaps 607: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
608: pos++;
1.77 kristaps 609:
610: sv = pos;
1.75 kristaps 611: while (' ' == (*bufp)[pos])
612: pos++;
1.74 kristaps 613:
1.77 kristaps 614: /*
615: * Roff is weird. If we have just white-space after the
616: * conditional, it's considered the BODY and we exit without
617: * really doing anything. Warn about this. It's probably
618: * wrong.
619: */
620:
621: if ('\0' == (*bufp)[pos] && sv != pos) {
622: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
623: return(ROFF_ERR);
624: return(ROFF_IGN);
625: }
626:
627: if ( ! roffnode_push(r, tok, ln, ppos))
628: return(ROFF_ERR);
629:
1.74 kristaps 630: /* Don't evaluate: just assume NO. */
631:
1.75 kristaps 632: r->last->endspan = 1;
633:
634: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
635: r->last->endspan = -1;
636: pos += 2;
1.79 kristaps 637: }
1.74 kristaps 638:
1.77 kristaps 639: /*
640: * If there are no arguments on the line, the next-line scope is
641: * assumed.
642: */
643:
1.75 kristaps 644: if ('\0' == (*bufp)[pos])
645: return(ROFF_IGN);
1.77 kristaps 646:
647: /* Otherwise re-run the roff parser after recalculating. */
1.74 kristaps 648:
1.75 kristaps 649: *offs = pos;
650: return(ROFF_RERUN);
1.74 kristaps 651: }
CVSweb