Annotation of mandoc/roff.c, Revision 1.80
1.80 ! kristaps 1: /* $Id: roff.c,v 1.79 2010/05/17 00:06:36 kristaps Exp $ */
1.1 kristaps 2: /*
1.67 kristaps 3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.66 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.66 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
1.30 kristaps 20:
1.67 kristaps 21: #include <assert.h>
1.1 kristaps 22: #include <stdlib.h>
1.67 kristaps 23: #include <string.h>
1.75 kristaps 24: #include <stdio.h>
1.1 kristaps 25:
1.67 kristaps 26: #include "mandoc.h"
1.43 kristaps 27: #include "roff.h"
1.33 kristaps 28:
1.75 kristaps 29: #define ROFF_CTL(c) \
30: ('.' == (c) || '\'' == (c))
31:
1.67 kristaps 32: enum rofft {
1.80 ! kristaps 33: ROFF_am,
! 34: ROFF_ami,
! 35: ROFF_am1,
! 36: ROFF_de,
! 37: ROFF_dei,
! 38: ROFF_de1,
1.75 kristaps 39: ROFF_if,
1.76 kristaps 40: ROFF_ig,
41: ROFF_cblock,
1.75 kristaps 42: ROFF_ccond,
1.74 kristaps 43: #if 0
1.79 kristaps 44: ROFF_ie,
45: ROFF_el,
1.74 kristaps 46: #endif
1.67 kristaps 47: ROFF_MAX
48: };
49:
50: struct roff {
51: struct roffnode *last; /* leaf of stack */
52: mandocmsg msg; /* err/warn/fatal messages */
53: void *data; /* privdata for messages */
54: };
55:
1.79 kristaps 56: enum roffrule {
57: ROFFRULE_ALLOW,
58: ROFFRULE_DENY
59: };
60:
1.67 kristaps 61: struct roffnode {
62: enum rofft tok; /* type of node */
63: struct roffnode *parent; /* up one in stack */
64: int line; /* parse line */
65: int col; /* parse col */
1.79 kristaps 66: char *end; /* end-rules: custom token */
67: int endspan; /* end-rules: next-line or infty */
68: enum roffrule rule;
1.67 kristaps 69: };
70:
71: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
1.72 kristaps 72: enum rofft tok, /* tok of macro */ \
1.67 kristaps 73: char **bufp, /* input buffer */ \
74: size_t *szp, /* size of input buffer */ \
75: int ln, /* parse line */ \
1.75 kristaps 76: int ppos, /* original pos in buffer */ \
77: int pos, /* current pos in buffer */ \
1.74 kristaps 78: int *offs /* reset offset of buffer data */
1.67 kristaps 79:
80: typedef enum rofferr (*roffproc)(ROFF_ARGS);
81:
82: struct roffmac {
83: const char *name; /* macro name */
1.79 kristaps 84: roffproc proc; /* process new macro */
85: roffproc text; /* process as child text of macro */
86: roffproc sub; /* process as child of macro */
87: int flags;
88: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.67 kristaps 89: };
90:
1.80 ! kristaps 91: static enum rofferr roff_block(ROFF_ARGS);
! 92: static enum rofferr roff_block_text(ROFF_ARGS);
! 93: static enum rofferr roff_block_sub(ROFF_ARGS);
! 94: static enum rofferr roff_cblock(ROFF_ARGS);
! 95: static enum rofferr roff_ccond(ROFF_ARGS);
1.75 kristaps 96: static enum rofferr roff_if(ROFF_ARGS);
1.78 kristaps 97: static enum rofferr roff_if_text(ROFF_ARGS);
1.79 kristaps 98: static enum rofferr roff_if_sub(ROFF_ARGS);
1.67 kristaps 99:
100: const struct roffmac roffs[ROFF_MAX] = {
1.80 ! kristaps 101: { "am", roff_block, roff_block_text, roff_block_sub, 0 },
! 102: { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
! 103: { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
! 104: { "de", roff_block, roff_block_text, roff_block_sub, 0 },
! 105: { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
! 106: { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
1.79 kristaps 107: { "if", roff_if, roff_if_text, roff_if_sub, ROFFMAC_STRUCT },
1.80 ! kristaps 108: { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
1.79 kristaps 109: { ".", roff_cblock, NULL, NULL, 0 },
110: { "\\}", roff_ccond, NULL, NULL, 0 },
1.67 kristaps 111: };
112:
113: static void roff_free1(struct roff *);
114: static enum rofft roff_hash_find(const char *);
1.76 kristaps 115: static void roffnode_cleanscope(struct roff *);
1.67 kristaps 116: static int roffnode_push(struct roff *,
117: enum rofft, int, int);
118: static void roffnode_pop(struct roff *);
119: static enum rofft roff_parse(const char *, int *);
120:
121:
122: /*
123: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
124: * the nil-terminated string name could be found.
125: */
126: static enum rofft
127: roff_hash_find(const char *p)
128: {
129: int i;
130:
131: /* FIXME: make this be fast and efficient. */
132:
133: for (i = 0; i < (int)ROFF_MAX; i++)
134: if (0 == strcmp(roffs[i].name, p))
135: return((enum rofft)i);
136:
137: return(ROFF_MAX);
138: }
139:
140:
141: /*
142: * Pop the current node off of the stack of roff instructions currently
143: * pending.
144: */
145: static void
146: roffnode_pop(struct roff *r)
147: {
148: struct roffnode *p;
149:
1.75 kristaps 150: assert(r->last);
151: p = r->last;
152: r->last = r->last->parent;
1.74 kristaps 153: if (p->end)
154: free(p->end);
1.67 kristaps 155: free(p);
156: }
157:
158:
159: /*
160: * Push a roff node onto the instruction stack. This must later be
161: * removed with roffnode_pop().
162: */
163: static int
164: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
165: {
166: struct roffnode *p;
167:
168: if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
169: (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
170: return(0);
171: }
172:
173: p->tok = tok;
174: p->parent = r->last;
175: p->line = line;
176: p->col = col;
1.79 kristaps 177: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.67 kristaps 178:
179: r->last = p;
180: return(1);
181: }
182:
183:
184: static void
185: roff_free1(struct roff *r)
186: {
187:
188: while (r->last)
189: roffnode_pop(r);
190: }
191:
192:
193: void
194: roff_reset(struct roff *r)
195: {
196:
197: roff_free1(r);
198: }
199:
200:
201: void
202: roff_free(struct roff *r)
203: {
204:
205: roff_free1(r);
206: free(r);
207: }
208:
209:
210: struct roff *
211: roff_alloc(const mandocmsg msg, void *data)
212: {
213: struct roff *r;
214:
215: if (NULL == (r = calloc(1, sizeof(struct roff)))) {
216: (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
217: return(0);
218: }
219:
220: r->msg = msg;
221: r->data = data;
222: return(r);
223: }
224:
225:
226: enum rofferr
1.74 kristaps 227: roff_parseln(struct roff *r, int ln,
228: char **bufp, size_t *szp, int pos, int *offs)
1.67 kristaps 229: {
230: enum rofft t;
1.79 kristaps 231: int ppos;
232:
233: /*
234: * First, if a scope is open and we're not a macro, pass the
235: * text through the macro's filter. If a scope isn't open and
236: * we're not a macro, just let it through.
237: */
1.74 kristaps 238:
1.75 kristaps 239: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
1.78 kristaps 240: t = r->last->tok;
241: assert(roffs[t].text);
242: return((*roffs[t].text)
243: (r, t, bufp, szp, ln, pos, pos, offs));
1.75 kristaps 244: } else if ( ! ROFF_CTL((*bufp)[pos]))
1.67 kristaps 245: return(ROFF_CONT);
246:
1.79 kristaps 247: /*
248: * If a scope is open, go to the child handler for that macro,
249: * as it may want to preprocess before doing anything with it.
250: */
1.78 kristaps 251:
1.79 kristaps 252: if (r->last) {
253: t = r->last->tok;
254: assert(roffs[t].sub);
255: return((*roffs[t].sub)
256: (r, t, bufp, szp, ln, pos, pos, offs));
257: }
1.78 kristaps 258:
1.79 kristaps 259: /*
260: * Lastly, as we've no scope open, try to look up and execute
261: * the new macro. If no macro is found, simply return and let
262: * the compilers handle it.
263: */
1.67 kristaps 264:
1.75 kristaps 265: ppos = pos;
1.79 kristaps 266: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
267: return(ROFF_CONT);
1.67 kristaps 268:
1.75 kristaps 269: assert(roffs[t].proc);
1.78 kristaps 270: return((*roffs[t].proc)
271: (r, t, bufp, szp, ln, ppos, pos, offs));
1.74 kristaps 272: }
273:
274:
275: int
276: roff_endparse(struct roff *r)
277: {
278:
279: if (NULL == r->last)
280: return(1);
281: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
282: r->last->col, NULL));
1.67 kristaps 283: }
284:
285:
286: /*
287: * Parse a roff node's type from the input buffer. This must be in the
288: * form of ".foo xxx" in the usual way.
289: */
290: static enum rofft
291: roff_parse(const char *buf, int *pos)
292: {
293: int j;
294: char mac[5];
295: enum rofft t;
296:
1.75 kristaps 297: assert(ROFF_CTL(buf[*pos]));
298: (*pos)++;
1.67 kristaps 299:
300: while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
301: (*pos)++;
302:
303: if ('\0' == buf[*pos])
304: return(ROFF_MAX);
305:
306: for (j = 0; j < 4; j++, (*pos)++)
307: if ('\0' == (mac[j] = buf[*pos]))
308: break;
309: else if (' ' == buf[*pos])
310: break;
311:
312: if (j == 4 || j < 1)
313: return(ROFF_MAX);
314:
315: mac[j] = '\0';
316:
317: if (ROFF_MAX == (t = roff_hash_find(mac)))
318: return(t);
319:
320: while (buf[*pos] && ' ' == buf[*pos])
321: (*pos)++;
322:
323: return(t);
324: }
325:
326:
327: /* ARGSUSED */
328: static enum rofferr
1.76 kristaps 329: roff_cblock(ROFF_ARGS)
1.67 kristaps 330: {
331:
1.79 kristaps 332: /*
333: * A block-close `..' should only be invoked as a child of an
334: * ignore macro, otherwise raise a warning and just ignore it.
335: */
336:
1.76 kristaps 337: if (NULL == r->last) {
338: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
339: return(ROFF_ERR);
340: return(ROFF_IGN);
341: }
1.67 kristaps 342:
1.76 kristaps 343: if (ROFF_ig != r->last->tok) {
344: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
345: return(ROFF_ERR);
1.67 kristaps 346: return(ROFF_IGN);
1.76 kristaps 347: }
1.67 kristaps 348:
1.76 kristaps 349: if ((*bufp)[pos])
350: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
351: return(ROFF_ERR);
1.71 kristaps 352:
353: roffnode_pop(r);
1.76 kristaps 354: roffnode_cleanscope(r);
355: return(ROFF_IGN);
1.71 kristaps 356:
1.67 kristaps 357: }
358:
359:
1.76 kristaps 360: static void
361: roffnode_cleanscope(struct roff *r)
1.67 kristaps 362: {
363:
1.76 kristaps 364: while (r->last) {
365: if (--r->last->endspan < 0)
366: break;
367: roffnode_pop(r);
368: }
1.67 kristaps 369: }
370:
371:
1.75 kristaps 372: /* ARGSUSED */
1.74 kristaps 373: static enum rofferr
1.75 kristaps 374: roff_ccond(ROFF_ARGS)
1.74 kristaps 375: {
376:
1.76 kristaps 377: if (NULL == r->last) {
378: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
379: return(ROFF_ERR);
380: return(ROFF_IGN);
381: }
382:
383: if (ROFF_if != r->last->tok) {
1.75 kristaps 384: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
385: return(ROFF_ERR);
386: return(ROFF_IGN);
387: }
388:
1.76 kristaps 389: if (r->last->endspan > -1) {
390: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
391: return(ROFF_ERR);
392: return(ROFF_IGN);
393: }
394:
395: if ((*bufp)[pos])
396: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
397: return(ROFF_ERR);
398:
1.75 kristaps 399: roffnode_pop(r);
1.76 kristaps 400: roffnode_cleanscope(r);
401: return(ROFF_IGN);
402: }
403:
1.75 kristaps 404:
1.76 kristaps 405: /* ARGSUSED */
406: static enum rofferr
1.80 ! kristaps 407: roff_block(ROFF_ARGS)
1.76 kristaps 408: {
1.78 kristaps 409: int sv;
410: size_t sz;
1.76 kristaps 411:
1.80 ! kristaps 412: if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
! 413: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
! 414: return(ROFF_ERR);
! 415: return(ROFF_IGN);
! 416: } else if (ROFF_ig != tok) {
! 417: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
! 418: pos++;
! 419: while (' ' == (*bufp)[pos])
! 420: pos++;
! 421: }
! 422:
1.76 kristaps 423: if ( ! roffnode_push(r, tok, ln, ppos))
424: return(ROFF_ERR);
425:
1.79 kristaps 426: if ('\0' == (*bufp)[pos])
1.78 kristaps 427: return(ROFF_IGN);
428:
429: sv = pos;
430: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
431: '\t' != (*bufp)[pos])
432: pos++;
433:
434: /*
435: * Note: groff does NOT like escape characters in the input.
436: * Instead of detecting this, we're just going to let it fly and
437: * to hell with it.
438: */
439:
440: assert(pos > sv);
441: sz = (size_t)(pos - sv);
442:
1.79 kristaps 443: if (1 == sz && '.' == (*bufp)[sv])
444: return(ROFF_IGN);
445:
1.78 kristaps 446: r->last->end = malloc(sz + 1);
447:
448: if (NULL == r->last->end) {
449: (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
450: return(ROFF_ERR);
451: }
452:
453: memcpy(r->last->end, *bufp + sv, sz);
454: r->last->end[(int)sz] = '\0';
455:
1.77 kristaps 456: if ((*bufp)[pos])
457: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
458: return(ROFF_ERR);
1.74 kristaps 459:
1.78 kristaps 460: return(ROFF_IGN);
461: }
462:
463:
464: /* ARGSUSED */
465: static enum rofferr
1.79 kristaps 466: roff_if_sub(ROFF_ARGS)
467: {
468: enum rofft t;
469: enum roffrule rr;
470:
471: ppos = pos;
472: rr = r->last->rule;
473: roffnode_cleanscope(r);
474:
475: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
476: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
477:
478: /*
479: * A denied conditional must evaluate its children if and only
480: * if they're either structurally required (such as loops and
481: * conditionals) or a closing macro.
482: */
483: if (ROFFRULE_DENY == rr)
484: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
485: if (ROFF_ccond != t)
486: return(ROFF_IGN);
487:
488: assert(roffs[t].proc);
489: return((*roffs[t].proc)
490: (r, t, bufp, szp, ln, ppos, pos, offs));
491: }
492:
493:
494: /* ARGSUSED */
495: static enum rofferr
1.80 ! kristaps 496: roff_block_sub(ROFF_ARGS)
1.79 kristaps 497: {
498: enum rofft t;
499: int i, j;
500:
501: /*
502: * First check whether a custom macro exists at this level. If
503: * it does, then check against it. This is some of groff's
504: * stranger behaviours. If we encountered a custom end-scope
505: * tag and that tag also happens to be a "real" macro, then we
506: * need to try interpreting it again as a real macro. If it's
507: * not, then return ignore. Else continue.
508: */
509:
510: if (r->last->end) {
511: i = pos + 1;
512: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
513: i++;
514:
515: for (j = 0; r->last->end[j]; j++, i++)
516: if ((*bufp)[i] != r->last->end[j])
517: break;
518:
519: if ('\0' == r->last->end[j] &&
520: ('\0' == (*bufp)[i] ||
521: ' ' == (*bufp)[i] ||
522: '\t' == (*bufp)[i])) {
523: roffnode_pop(r);
524: roffnode_cleanscope(r);
525:
526: if (ROFF_MAX != roff_parse(*bufp, &pos))
527: return(ROFF_RERUN);
528: return(ROFF_IGN);
529: }
530: }
531:
532: /*
533: * If we have no custom end-query or lookup failed, then try
534: * pulling it out of the hashtable.
535: */
536:
537: ppos = pos;
538: t = roff_parse(*bufp, &pos);
539:
540: /* If we're not a comment-end, then throw it away. */
541: if (ROFF_cblock != t)
542: return(ROFF_IGN);
543:
544: assert(roffs[t].proc);
545: return((*roffs[t].proc)(r, t, bufp,
546: szp, ln, ppos, pos, offs));
547: }
548:
549:
550: /* ARGSUSED */
551: static enum rofferr
1.80 ! kristaps 552: roff_block_text(ROFF_ARGS)
1.78 kristaps 553: {
554:
555: return(ROFF_IGN);
556: }
557:
558:
559: /* ARGSUSED */
560: static enum rofferr
561: roff_if_text(ROFF_ARGS)
562: {
563: char *ep, *st;
564:
565: st = &(*bufp)[pos];
566: if (NULL == (ep = strstr(st, "\\}"))) {
567: roffnode_cleanscope(r);
568: return(ROFF_IGN);
569: }
570:
1.79 kristaps 571: if (ep > st && '\\' != *(ep - 1))
1.78 kristaps 572: roffnode_pop(r);
573:
574: roffnode_cleanscope(r);
1.74 kristaps 575: return(ROFF_IGN);
576: }
577:
578:
1.75 kristaps 579: /* ARGSUSED */
1.74 kristaps 580: static enum rofferr
1.75 kristaps 581: roff_if(ROFF_ARGS)
1.74 kristaps 582: {
1.77 kristaps 583: int sv;
1.74 kristaps 584:
585: /*
586: * Read ahead past the conditional.
587: * FIXME: this does not work, as conditionals don't end on
588: * whitespace, but are parsed according to a formal grammar.
589: * It's good enough for now, however.
590: */
591:
1.75 kristaps 592: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
593: pos++;
1.77 kristaps 594:
595: sv = pos;
1.75 kristaps 596: while (' ' == (*bufp)[pos])
597: pos++;
1.74 kristaps 598:
1.77 kristaps 599: /*
600: * Roff is weird. If we have just white-space after the
601: * conditional, it's considered the BODY and we exit without
602: * really doing anything. Warn about this. It's probably
603: * wrong.
604: */
605:
606: if ('\0' == (*bufp)[pos] && sv != pos) {
607: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
608: return(ROFF_ERR);
609: return(ROFF_IGN);
610: }
611:
612: if ( ! roffnode_push(r, tok, ln, ppos))
613: return(ROFF_ERR);
614:
1.74 kristaps 615: /* Don't evaluate: just assume NO. */
616:
1.75 kristaps 617: r->last->endspan = 1;
618:
619: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
620: r->last->endspan = -1;
621: pos += 2;
1.79 kristaps 622: }
1.74 kristaps 623:
1.77 kristaps 624: /*
625: * If there are no arguments on the line, the next-line scope is
626: * assumed.
627: */
628:
1.75 kristaps 629: if ('\0' == (*bufp)[pos])
630: return(ROFF_IGN);
1.77 kristaps 631:
632: /* Otherwise re-run the roff parser after recalculating. */
1.74 kristaps 633:
1.75 kristaps 634: *offs = pos;
635: return(ROFF_RERUN);
1.74 kristaps 636: }
CVSweb