Annotation of mandoc/roff.c, Revision 1.79
1.79 ! kristaps 1: /* $Id: roff.c,v 1.78 2010/05/16 22:28:33 kristaps Exp $ */
1.1 kristaps 2: /*
1.67 kristaps 3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.66 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.66 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
1.30 kristaps 20:
1.67 kristaps 21: #include <assert.h>
1.1 kristaps 22: #include <stdlib.h>
1.67 kristaps 23: #include <string.h>
1.75 kristaps 24: #include <stdio.h>
1.1 kristaps 25:
1.67 kristaps 26: #include "mandoc.h"
1.43 kristaps 27: #include "roff.h"
1.33 kristaps 28:
1.75 kristaps 29: #define ROFF_CTL(c) \
30: ('.' == (c) || '\'' == (c))
31:
1.67 kristaps 32: enum rofft {
1.75 kristaps 33: ROFF_if,
1.76 kristaps 34: ROFF_ig,
35: ROFF_cblock,
1.75 kristaps 36: ROFF_ccond,
1.74 kristaps 37: #if 0
38: ROFF_am,
39: ROFF_ami,
1.67 kristaps 40: ROFF_de,
41: ROFF_dei,
1.79 ! kristaps 42: ROFF_ie,
! 43: ROFF_el,
1.74 kristaps 44: #endif
1.67 kristaps 45: ROFF_MAX
46: };
47:
48: struct roff {
49: struct roffnode *last; /* leaf of stack */
50: mandocmsg msg; /* err/warn/fatal messages */
51: void *data; /* privdata for messages */
52: };
53:
1.79 ! kristaps 54: enum roffrule {
! 55: ROFFRULE_ALLOW,
! 56: ROFFRULE_DENY
! 57: };
! 58:
1.67 kristaps 59: struct roffnode {
60: enum rofft tok; /* type of node */
61: struct roffnode *parent; /* up one in stack */
62: int line; /* parse line */
63: int col; /* parse col */
1.79 ! kristaps 64: char *end; /* end-rules: custom token */
! 65: int endspan; /* end-rules: next-line or infty */
! 66: enum roffrule rule;
1.67 kristaps 67: };
68:
69: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
1.72 kristaps 70: enum rofft tok, /* tok of macro */ \
1.67 kristaps 71: char **bufp, /* input buffer */ \
72: size_t *szp, /* size of input buffer */ \
73: int ln, /* parse line */ \
1.75 kristaps 74: int ppos, /* original pos in buffer */ \
75: int pos, /* current pos in buffer */ \
1.74 kristaps 76: int *offs /* reset offset of buffer data */
1.67 kristaps 77:
78: typedef enum rofferr (*roffproc)(ROFF_ARGS);
79:
80: struct roffmac {
81: const char *name; /* macro name */
1.79 ! kristaps 82: roffproc proc; /* process new macro */
! 83: roffproc text; /* process as child text of macro */
! 84: roffproc sub; /* process as child of macro */
! 85: int flags;
! 86: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.67 kristaps 87: };
88:
1.75 kristaps 89: static enum rofferr roff_if(ROFF_ARGS);
1.78 kristaps 90: static enum rofferr roff_if_text(ROFF_ARGS);
1.79 ! kristaps 91: static enum rofferr roff_if_sub(ROFF_ARGS);
1.76 kristaps 92: static enum rofferr roff_ig(ROFF_ARGS);
1.78 kristaps 93: static enum rofferr roff_ig_text(ROFF_ARGS);
1.79 ! kristaps 94: static enum rofferr roff_ig_sub(ROFF_ARGS);
1.76 kristaps 95: static enum rofferr roff_cblock(ROFF_ARGS);
1.75 kristaps 96: static enum rofferr roff_ccond(ROFF_ARGS);
1.67 kristaps 97:
98: const struct roffmac roffs[ROFF_MAX] = {
1.79 ! kristaps 99: { "if", roff_if, roff_if_text, roff_if_sub, ROFFMAC_STRUCT },
! 100: { "ig", roff_ig, roff_ig_text, roff_ig_sub, 0 },
! 101: { ".", roff_cblock, NULL, NULL, 0 },
! 102: { "\\}", roff_ccond, NULL, NULL, 0 },
1.67 kristaps 103: };
104:
105: static void roff_free1(struct roff *);
106: static enum rofft roff_hash_find(const char *);
1.76 kristaps 107: static void roffnode_cleanscope(struct roff *);
1.67 kristaps 108: static int roffnode_push(struct roff *,
109: enum rofft, int, int);
110: static void roffnode_pop(struct roff *);
111: static enum rofft roff_parse(const char *, int *);
112:
113:
114: /*
115: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
116: * the nil-terminated string name could be found.
117: */
118: static enum rofft
119: roff_hash_find(const char *p)
120: {
121: int i;
122:
123: /* FIXME: make this be fast and efficient. */
124:
125: for (i = 0; i < (int)ROFF_MAX; i++)
126: if (0 == strcmp(roffs[i].name, p))
127: return((enum rofft)i);
128:
129: return(ROFF_MAX);
130: }
131:
132:
133: /*
134: * Pop the current node off of the stack of roff instructions currently
135: * pending.
136: */
137: static void
138: roffnode_pop(struct roff *r)
139: {
140: struct roffnode *p;
141:
1.75 kristaps 142: assert(r->last);
143: p = r->last;
144: r->last = r->last->parent;
1.74 kristaps 145: if (p->end)
146: free(p->end);
1.67 kristaps 147: free(p);
148: }
149:
150:
151: /*
152: * Push a roff node onto the instruction stack. This must later be
153: * removed with roffnode_pop().
154: */
155: static int
156: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
157: {
158: struct roffnode *p;
159:
160: if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
161: (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
162: return(0);
163: }
164:
165: p->tok = tok;
166: p->parent = r->last;
167: p->line = line;
168: p->col = col;
1.79 ! kristaps 169: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.67 kristaps 170:
171: r->last = p;
172: return(1);
173: }
174:
175:
176: static void
177: roff_free1(struct roff *r)
178: {
179:
180: while (r->last)
181: roffnode_pop(r);
182: }
183:
184:
185: void
186: roff_reset(struct roff *r)
187: {
188:
189: roff_free1(r);
190: }
191:
192:
193: void
194: roff_free(struct roff *r)
195: {
196:
197: roff_free1(r);
198: free(r);
199: }
200:
201:
202: struct roff *
203: roff_alloc(const mandocmsg msg, void *data)
204: {
205: struct roff *r;
206:
207: if (NULL == (r = calloc(1, sizeof(struct roff)))) {
208: (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
209: return(0);
210: }
211:
212: r->msg = msg;
213: r->data = data;
214: return(r);
215: }
216:
217:
218: enum rofferr
1.74 kristaps 219: roff_parseln(struct roff *r, int ln,
220: char **bufp, size_t *szp, int pos, int *offs)
1.67 kristaps 221: {
222: enum rofft t;
1.79 ! kristaps 223: int ppos;
! 224:
! 225: /*
! 226: * First, if a scope is open and we're not a macro, pass the
! 227: * text through the macro's filter. If a scope isn't open and
! 228: * we're not a macro, just let it through.
! 229: */
1.74 kristaps 230:
1.75 kristaps 231: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
1.78 kristaps 232: t = r->last->tok;
233: assert(roffs[t].text);
234: return((*roffs[t].text)
235: (r, t, bufp, szp, ln, pos, pos, offs));
1.75 kristaps 236: } else if ( ! ROFF_CTL((*bufp)[pos]))
1.67 kristaps 237: return(ROFF_CONT);
238:
1.79 ! kristaps 239: /*
! 240: * If a scope is open, go to the child handler for that macro,
! 241: * as it may want to preprocess before doing anything with it.
! 242: */
1.78 kristaps 243:
1.79 ! kristaps 244: if (r->last) {
! 245: t = r->last->tok;
! 246: assert(roffs[t].sub);
! 247: return((*roffs[t].sub)
! 248: (r, t, bufp, szp, ln, pos, pos, offs));
! 249: }
1.78 kristaps 250:
1.79 ! kristaps 251: /*
! 252: * Lastly, as we've no scope open, try to look up and execute
! 253: * the new macro. If no macro is found, simply return and let
! 254: * the compilers handle it.
! 255: */
1.67 kristaps 256:
1.75 kristaps 257: ppos = pos;
1.79 ! kristaps 258: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
! 259: return(ROFF_CONT);
1.67 kristaps 260:
1.75 kristaps 261: assert(roffs[t].proc);
1.78 kristaps 262: return((*roffs[t].proc)
263: (r, t, bufp, szp, ln, ppos, pos, offs));
1.74 kristaps 264: }
265:
266:
267: int
268: roff_endparse(struct roff *r)
269: {
270:
271: if (NULL == r->last)
272: return(1);
273: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
274: r->last->col, NULL));
1.67 kristaps 275: }
276:
277:
278: /*
279: * Parse a roff node's type from the input buffer. This must be in the
280: * form of ".foo xxx" in the usual way.
281: */
282: static enum rofft
283: roff_parse(const char *buf, int *pos)
284: {
285: int j;
286: char mac[5];
287: enum rofft t;
288:
1.75 kristaps 289: assert(ROFF_CTL(buf[*pos]));
290: (*pos)++;
1.67 kristaps 291:
292: while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
293: (*pos)++;
294:
295: if ('\0' == buf[*pos])
296: return(ROFF_MAX);
297:
298: for (j = 0; j < 4; j++, (*pos)++)
299: if ('\0' == (mac[j] = buf[*pos]))
300: break;
301: else if (' ' == buf[*pos])
302: break;
303:
304: if (j == 4 || j < 1)
305: return(ROFF_MAX);
306:
307: mac[j] = '\0';
308:
309: if (ROFF_MAX == (t = roff_hash_find(mac)))
310: return(t);
311:
312: while (buf[*pos] && ' ' == buf[*pos])
313: (*pos)++;
314:
315: return(t);
316: }
317:
318:
319: /* ARGSUSED */
320: static enum rofferr
1.76 kristaps 321: roff_cblock(ROFF_ARGS)
1.67 kristaps 322: {
323:
1.79 ! kristaps 324: /*
! 325: * A block-close `..' should only be invoked as a child of an
! 326: * ignore macro, otherwise raise a warning and just ignore it.
! 327: */
! 328:
1.76 kristaps 329: if (NULL == r->last) {
330: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
331: return(ROFF_ERR);
332: return(ROFF_IGN);
333: }
1.67 kristaps 334:
1.76 kristaps 335: if (ROFF_ig != r->last->tok) {
336: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
337: return(ROFF_ERR);
1.67 kristaps 338: return(ROFF_IGN);
1.76 kristaps 339: }
1.67 kristaps 340:
1.76 kristaps 341: if ((*bufp)[pos])
342: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
343: return(ROFF_ERR);
1.71 kristaps 344:
345: roffnode_pop(r);
1.76 kristaps 346: roffnode_cleanscope(r);
347: return(ROFF_IGN);
1.71 kristaps 348:
1.67 kristaps 349: }
350:
351:
1.76 kristaps 352: static void
353: roffnode_cleanscope(struct roff *r)
1.67 kristaps 354: {
355:
1.76 kristaps 356: while (r->last) {
357: if (--r->last->endspan < 0)
358: break;
359: roffnode_pop(r);
360: }
1.67 kristaps 361: }
362:
363:
1.75 kristaps 364: /* ARGSUSED */
1.74 kristaps 365: static enum rofferr
1.75 kristaps 366: roff_ccond(ROFF_ARGS)
1.74 kristaps 367: {
368:
1.76 kristaps 369: if (NULL == r->last) {
370: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
371: return(ROFF_ERR);
372: return(ROFF_IGN);
373: }
374:
375: if (ROFF_if != r->last->tok) {
1.75 kristaps 376: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
377: return(ROFF_ERR);
378: return(ROFF_IGN);
379: }
380:
1.76 kristaps 381: if (r->last->endspan > -1) {
382: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
383: return(ROFF_ERR);
384: return(ROFF_IGN);
385: }
386:
387: if ((*bufp)[pos])
388: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
389: return(ROFF_ERR);
390:
1.75 kristaps 391: roffnode_pop(r);
1.76 kristaps 392: roffnode_cleanscope(r);
393: return(ROFF_IGN);
394: }
395:
1.75 kristaps 396:
1.76 kristaps 397: /* ARGSUSED */
398: static enum rofferr
399: roff_ig(ROFF_ARGS)
400: {
1.78 kristaps 401: int sv;
402: size_t sz;
1.76 kristaps 403:
404: if ( ! roffnode_push(r, tok, ln, ppos))
405: return(ROFF_ERR);
406:
1.79 ! kristaps 407: if ('\0' == (*bufp)[pos])
1.78 kristaps 408: return(ROFF_IGN);
409:
410: sv = pos;
411: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
412: '\t' != (*bufp)[pos])
413: pos++;
414:
415: /*
416: * Note: groff does NOT like escape characters in the input.
417: * Instead of detecting this, we're just going to let it fly and
418: * to hell with it.
419: */
420:
421: assert(pos > sv);
422: sz = (size_t)(pos - sv);
423:
1.79 ! kristaps 424: if (1 == sz && '.' == (*bufp)[sv])
! 425: return(ROFF_IGN);
! 426:
1.78 kristaps 427: r->last->end = malloc(sz + 1);
428:
429: if (NULL == r->last->end) {
430: (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
431: return(ROFF_ERR);
432: }
433:
434: memcpy(r->last->end, *bufp + sv, sz);
435: r->last->end[(int)sz] = '\0';
436:
1.77 kristaps 437: if ((*bufp)[pos])
438: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
439: return(ROFF_ERR);
1.74 kristaps 440:
1.78 kristaps 441: return(ROFF_IGN);
442: }
443:
444:
445: /* ARGSUSED */
446: static enum rofferr
1.79 ! kristaps 447: roff_if_sub(ROFF_ARGS)
! 448: {
! 449: enum rofft t;
! 450: enum roffrule rr;
! 451:
! 452: ppos = pos;
! 453: rr = r->last->rule;
! 454: roffnode_cleanscope(r);
! 455:
! 456: if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
! 457: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
! 458:
! 459: /*
! 460: * A denied conditional must evaluate its children if and only
! 461: * if they're either structurally required (such as loops and
! 462: * conditionals) or a closing macro.
! 463: */
! 464: if (ROFFRULE_DENY == rr)
! 465: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
! 466: if (ROFF_ccond != t)
! 467: return(ROFF_IGN);
! 468:
! 469: assert(roffs[t].proc);
! 470: return((*roffs[t].proc)
! 471: (r, t, bufp, szp, ln, ppos, pos, offs));
! 472: }
! 473:
! 474:
! 475: /* ARGSUSED */
! 476: static enum rofferr
! 477: roff_ig_sub(ROFF_ARGS)
! 478: {
! 479: enum rofft t;
! 480: int i, j;
! 481:
! 482: /*
! 483: * First check whether a custom macro exists at this level. If
! 484: * it does, then check against it. This is some of groff's
! 485: * stranger behaviours. If we encountered a custom end-scope
! 486: * tag and that tag also happens to be a "real" macro, then we
! 487: * need to try interpreting it again as a real macro. If it's
! 488: * not, then return ignore. Else continue.
! 489: */
! 490:
! 491: if (r->last->end) {
! 492: i = pos + 1;
! 493: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
! 494: i++;
! 495:
! 496: for (j = 0; r->last->end[j]; j++, i++)
! 497: if ((*bufp)[i] != r->last->end[j])
! 498: break;
! 499:
! 500: if ('\0' == r->last->end[j] &&
! 501: ('\0' == (*bufp)[i] ||
! 502: ' ' == (*bufp)[i] ||
! 503: '\t' == (*bufp)[i])) {
! 504: roffnode_pop(r);
! 505: roffnode_cleanscope(r);
! 506:
! 507: if (ROFF_MAX != roff_parse(*bufp, &pos))
! 508: return(ROFF_RERUN);
! 509: return(ROFF_IGN);
! 510: }
! 511: }
! 512:
! 513: /*
! 514: * If we have no custom end-query or lookup failed, then try
! 515: * pulling it out of the hashtable.
! 516: */
! 517:
! 518: ppos = pos;
! 519: t = roff_parse(*bufp, &pos);
! 520:
! 521: /* If we're not a comment-end, then throw it away. */
! 522: if (ROFF_cblock != t)
! 523: return(ROFF_IGN);
! 524:
! 525: assert(roffs[t].proc);
! 526: return((*roffs[t].proc)(r, t, bufp,
! 527: szp, ln, ppos, pos, offs));
! 528: }
! 529:
! 530:
! 531: /* ARGSUSED */
! 532: static enum rofferr
1.78 kristaps 533: roff_ig_text(ROFF_ARGS)
534: {
535:
536: return(ROFF_IGN);
537: }
538:
539:
540: /* ARGSUSED */
541: static enum rofferr
542: roff_if_text(ROFF_ARGS)
543: {
544: char *ep, *st;
545:
546: st = &(*bufp)[pos];
547: if (NULL == (ep = strstr(st, "\\}"))) {
548: roffnode_cleanscope(r);
549: return(ROFF_IGN);
550: }
551:
1.79 ! kristaps 552: if (ep > st && '\\' != *(ep - 1))
1.78 kristaps 553: roffnode_pop(r);
554:
555: roffnode_cleanscope(r);
1.74 kristaps 556: return(ROFF_IGN);
557: }
558:
559:
1.75 kristaps 560: /* ARGSUSED */
1.74 kristaps 561: static enum rofferr
1.75 kristaps 562: roff_if(ROFF_ARGS)
1.74 kristaps 563: {
1.77 kristaps 564: int sv;
1.74 kristaps 565:
566: /*
567: * Read ahead past the conditional.
568: * FIXME: this does not work, as conditionals don't end on
569: * whitespace, but are parsed according to a formal grammar.
570: * It's good enough for now, however.
571: */
572:
1.75 kristaps 573: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
574: pos++;
1.77 kristaps 575:
576: sv = pos;
1.75 kristaps 577: while (' ' == (*bufp)[pos])
578: pos++;
1.74 kristaps 579:
1.77 kristaps 580: /*
581: * Roff is weird. If we have just white-space after the
582: * conditional, it's considered the BODY and we exit without
583: * really doing anything. Warn about this. It's probably
584: * wrong.
585: */
586:
587: if ('\0' == (*bufp)[pos] && sv != pos) {
588: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
589: return(ROFF_ERR);
590: return(ROFF_IGN);
591: }
592:
593: if ( ! roffnode_push(r, tok, ln, ppos))
594: return(ROFF_ERR);
595:
1.74 kristaps 596: /* Don't evaluate: just assume NO. */
597:
1.75 kristaps 598: r->last->endspan = 1;
599:
600: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
601: r->last->endspan = -1;
602: pos += 2;
1.79 ! kristaps 603: }
1.74 kristaps 604:
1.77 kristaps 605: /*
606: * If there are no arguments on the line, the next-line scope is
607: * assumed.
608: */
609:
1.75 kristaps 610: if ('\0' == (*bufp)[pos])
611: return(ROFF_IGN);
1.77 kristaps 612:
613: /* Otherwise re-run the roff parser after recalculating. */
1.74 kristaps 614:
1.75 kristaps 615: *offs = pos;
616: return(ROFF_RERUN);
1.74 kristaps 617: }
CVSweb