Annotation of mandoc/roff.c, Revision 1.78
1.78 ! kristaps 1: /* $Id: roff.c,v 1.77 2010/05/16 19:08:11 kristaps Exp $ */
1.1 kristaps 2: /*
1.67 kristaps 3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.66 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.66 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
1.30 kristaps 20:
1.67 kristaps 21: #include <assert.h>
1.1 kristaps 22: #include <stdlib.h>
1.67 kristaps 23: #include <string.h>
1.75 kristaps 24: #include <stdio.h>
1.1 kristaps 25:
1.67 kristaps 26: #include "mandoc.h"
1.43 kristaps 27: #include "roff.h"
1.33 kristaps 28:
1.75 kristaps 29: #define ROFF_CTL(c) \
30: ('.' == (c) || '\'' == (c))
1.76 kristaps 31: #if 0
1.75 kristaps 32: #define ROFF_MDEBUG(p, str) \
33: fprintf(stderr, "%s: %s (%d:%d)\n", (str), \
34: roffs[(p)->last->tok].name, \
35: (p)->last->line, (p)->last->col)
36: #else
37: #define ROFF_MDEBUG(p, str) while (/* CONSTCOND */ 0)
38: #endif
39:
1.67 kristaps 40: enum rofft {
1.75 kristaps 41: ROFF_if,
1.76 kristaps 42: ROFF_ig,
43: ROFF_cblock,
1.75 kristaps 44: ROFF_ccond,
1.74 kristaps 45: #if 0
46: ROFF_am,
47: ROFF_ami,
1.67 kristaps 48: ROFF_de,
49: ROFF_dei,
50: ROFF_close,
1.74 kristaps 51: #endif
1.67 kristaps 52: ROFF_MAX
53: };
54:
55: struct roff {
56: struct roffnode *last; /* leaf of stack */
57: mandocmsg msg; /* err/warn/fatal messages */
58: void *data; /* privdata for messages */
59: };
60:
61: struct roffnode {
62: enum rofft tok; /* type of node */
63: struct roffnode *parent; /* up one in stack */
1.74 kristaps 64: char *end; /* end-token: custom */
1.67 kristaps 65: int line; /* parse line */
66: int col; /* parse col */
1.75 kristaps 67: int endspan;
1.67 kristaps 68: };
69:
70: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
1.72 kristaps 71: enum rofft tok, /* tok of macro */ \
1.67 kristaps 72: char **bufp, /* input buffer */ \
73: size_t *szp, /* size of input buffer */ \
74: int ln, /* parse line */ \
1.75 kristaps 75: int ppos, /* original pos in buffer */ \
76: int pos, /* current pos in buffer */ \
1.74 kristaps 77: int *offs /* reset offset of buffer data */
1.67 kristaps 78:
79: typedef enum rofferr (*roffproc)(ROFF_ARGS);
80:
81: struct roffmac {
82: const char *name; /* macro name */
1.75 kristaps 83: roffproc proc;
1.78 ! kristaps 84: roffproc text;
1.67 kristaps 85: };
86:
1.75 kristaps 87: static enum rofferr roff_if(ROFF_ARGS);
1.78 ! kristaps 88: static enum rofferr roff_if_text(ROFF_ARGS);
1.76 kristaps 89: static enum rofferr roff_ig(ROFF_ARGS);
1.78 ! kristaps 90: static enum rofferr roff_ig_text(ROFF_ARGS);
1.76 kristaps 91: static enum rofferr roff_cblock(ROFF_ARGS);
1.75 kristaps 92: static enum rofferr roff_ccond(ROFF_ARGS);
1.67 kristaps 93:
94: const struct roffmac roffs[ROFF_MAX] = {
1.78 ! kristaps 95: { "if", roff_if, roff_if_text },
! 96: { "ig", roff_ig, roff_ig_text },
! 97: { ".", roff_cblock, NULL },
! 98: { "\\}", roff_ccond, NULL },
1.67 kristaps 99: };
100:
101: static void roff_free1(struct roff *);
102: static enum rofft roff_hash_find(const char *);
1.76 kristaps 103: static void roffnode_cleanscope(struct roff *);
1.67 kristaps 104: static int roffnode_push(struct roff *,
105: enum rofft, int, int);
106: static void roffnode_pop(struct roff *);
107: static enum rofft roff_parse(const char *, int *);
108:
109:
110: /*
111: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
112: * the nil-terminated string name could be found.
113: */
114: static enum rofft
115: roff_hash_find(const char *p)
116: {
117: int i;
118:
119: /* FIXME: make this be fast and efficient. */
120:
121: for (i = 0; i < (int)ROFF_MAX; i++)
122: if (0 == strcmp(roffs[i].name, p))
123: return((enum rofft)i);
124:
125: return(ROFF_MAX);
126: }
127:
128:
129: /*
130: * Pop the current node off of the stack of roff instructions currently
131: * pending.
132: */
133: static void
134: roffnode_pop(struct roff *r)
135: {
136: struct roffnode *p;
137:
1.75 kristaps 138: assert(r->last);
139: p = r->last;
140: r->last = r->last->parent;
1.74 kristaps 141: if (p->end)
142: free(p->end);
1.67 kristaps 143: free(p);
144: }
145:
146:
147: /*
148: * Push a roff node onto the instruction stack. This must later be
149: * removed with roffnode_pop().
150: */
151: static int
152: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
153: {
154: struct roffnode *p;
155:
156: if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
157: (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
158: return(0);
159: }
160:
161: p->tok = tok;
162: p->parent = r->last;
163: p->line = line;
164: p->col = col;
165:
166: r->last = p;
167: return(1);
168: }
169:
170:
171: static void
172: roff_free1(struct roff *r)
173: {
174:
175: while (r->last)
176: roffnode_pop(r);
177: }
178:
179:
180: void
181: roff_reset(struct roff *r)
182: {
183:
184: roff_free1(r);
185: }
186:
187:
188: void
189: roff_free(struct roff *r)
190: {
191:
192: roff_free1(r);
193: free(r);
194: }
195:
196:
197: struct roff *
198: roff_alloc(const mandocmsg msg, void *data)
199: {
200: struct roff *r;
201:
202: if (NULL == (r = calloc(1, sizeof(struct roff)))) {
203: (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
204: return(0);
205: }
206:
207: r->msg = msg;
208: r->data = data;
209: return(r);
210: }
211:
212:
213: enum rofferr
1.74 kristaps 214: roff_parseln(struct roff *r, int ln,
215: char **bufp, size_t *szp, int pos, int *offs)
1.67 kristaps 216: {
217: enum rofft t;
1.78 ! kristaps 218: int ppos, i, j, wtf;
1.74 kristaps 219:
1.75 kristaps 220: if (r->last && ! ROFF_CTL((*bufp)[pos])) {
1.78 ! kristaps 221: /*
! 222: * If a scope is open and we're not a macro, pass it
! 223: * through our text detector and continue as quickly as
! 224: * possible.
! 225: */
! 226: t = r->last->tok;
! 227: assert(roffs[t].text);
! 228: return((*roffs[t].text)
! 229: (r, t, bufp, szp, ln, pos, pos, offs));
1.75 kristaps 230: } else if ( ! ROFF_CTL((*bufp)[pos]))
1.78 ! kristaps 231: /*
! 232: * Don't do anything if we're free-form text.
! 233: */
1.67 kristaps 234: return(ROFF_CONT);
235:
1.78 ! kristaps 236: /* A macro-ish line with a possibly-open macro context. */
! 237:
! 238: wtf = 0;
! 239:
! 240: if (r->last && r->last->end) {
! 241: /*
! 242: * We have a scope open that has a custom end-macro
! 243: * handler. Try to match it against the input.
! 244: */
! 245: i = pos + 1;
! 246: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
! 247: i++;
! 248:
! 249: for (j = 0; r->last->end[j]; j++, i++)
! 250: if ((*bufp)[i] != r->last->end[j])
! 251: break;
! 252:
! 253: if ('\0' == r->last->end[j] &&
! 254: ('\0' == (*bufp)[i] ||
! 255: ' ' == (*bufp)[i] ||
! 256: '\t' == (*bufp)[i])) {
! 257: roffnode_pop(r);
! 258: roffnode_cleanscope(r);
! 259: wtf = 1;
! 260: }
! 261: }
1.67 kristaps 262:
1.75 kristaps 263: ppos = pos;
1.76 kristaps 264: if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) {
1.78 ! kristaps 265: /*
! 266: * This is some of groff's stranger behaviours. If we
! 267: * encountered a custom end-scope tag and that tag also
! 268: * happens to be a "real" macro, then we need to try
! 269: * interpreting it again as a real macro. If it's not,
! 270: * then return ignore. Else continue.
! 271: */
! 272: if (wtf)
1.76 kristaps 273: return(ROFF_IGN);
1.78 ! kristaps 274: else if (NULL == r->last)
! 275: return(ROFF_CONT);
! 276:
! 277: /* FIXME: this assumes that we ignore!? */
! 278: return(ROFF_IGN);
1.76 kristaps 279: }
1.67 kristaps 280:
1.75 kristaps 281: assert(roffs[t].proc);
1.78 ! kristaps 282: return((*roffs[t].proc)
! 283: (r, t, bufp, szp, ln, ppos, pos, offs));
1.74 kristaps 284: }
285:
286:
287: int
288: roff_endparse(struct roff *r)
289: {
290:
291: if (NULL == r->last)
292: return(1);
293: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
294: r->last->col, NULL));
1.67 kristaps 295: }
296:
297:
298: /*
299: * Parse a roff node's type from the input buffer. This must be in the
300: * form of ".foo xxx" in the usual way.
301: */
302: static enum rofft
303: roff_parse(const char *buf, int *pos)
304: {
305: int j;
306: char mac[5];
307: enum rofft t;
308:
1.75 kristaps 309: assert(ROFF_CTL(buf[*pos]));
310: (*pos)++;
1.67 kristaps 311:
312: while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
313: (*pos)++;
314:
315: if ('\0' == buf[*pos])
316: return(ROFF_MAX);
317:
318: for (j = 0; j < 4; j++, (*pos)++)
319: if ('\0' == (mac[j] = buf[*pos]))
320: break;
321: else if (' ' == buf[*pos])
322: break;
323:
324: if (j == 4 || j < 1)
325: return(ROFF_MAX);
326:
327: mac[j] = '\0';
328:
329: if (ROFF_MAX == (t = roff_hash_find(mac)))
330: return(t);
331:
332: while (buf[*pos] && ' ' == buf[*pos])
333: (*pos)++;
334:
335: return(t);
336: }
337:
338:
339: /* ARGSUSED */
340: static enum rofferr
1.76 kristaps 341: roff_cblock(ROFF_ARGS)
1.67 kristaps 342: {
343:
1.76 kristaps 344: if (NULL == r->last) {
345: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
346: return(ROFF_ERR);
347: return(ROFF_IGN);
348: }
1.67 kristaps 349:
1.76 kristaps 350: if (ROFF_ig != r->last->tok) {
351: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
352: return(ROFF_ERR);
1.67 kristaps 353: return(ROFF_IGN);
1.76 kristaps 354: }
1.67 kristaps 355:
1.76 kristaps 356: if ((*bufp)[pos])
357: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
358: return(ROFF_ERR);
1.71 kristaps 359:
1.76 kristaps 360: ROFF_MDEBUG(r, "closing ignore block");
1.71 kristaps 361: roffnode_pop(r);
1.76 kristaps 362: roffnode_cleanscope(r);
363: return(ROFF_IGN);
1.71 kristaps 364:
1.67 kristaps 365: }
366:
367:
1.76 kristaps 368: static void
369: roffnode_cleanscope(struct roff *r)
1.67 kristaps 370: {
371:
1.76 kristaps 372: while (r->last) {
373: if (--r->last->endspan < 0)
374: break;
375: ROFF_MDEBUG(r, "closing implicit scope");
376: roffnode_pop(r);
377: }
1.67 kristaps 378: }
379:
380:
1.75 kristaps 381: /* ARGSUSED */
1.74 kristaps 382: static enum rofferr
1.75 kristaps 383: roff_ccond(ROFF_ARGS)
1.74 kristaps 384: {
385:
1.76 kristaps 386: if (NULL == r->last) {
387: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
388: return(ROFF_ERR);
389: return(ROFF_IGN);
390: }
391:
392: if (ROFF_if != r->last->tok) {
1.75 kristaps 393: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
394: return(ROFF_ERR);
395: return(ROFF_IGN);
396: }
397:
1.76 kristaps 398: if (r->last->endspan > -1) {
399: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
400: return(ROFF_ERR);
401: return(ROFF_IGN);
402: }
403:
404: if ((*bufp)[pos])
405: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
406: return(ROFF_ERR);
407:
1.75 kristaps 408: ROFF_MDEBUG(r, "closing explicit scope");
409: roffnode_pop(r);
1.76 kristaps 410: roffnode_cleanscope(r);
411: return(ROFF_IGN);
412: }
413:
1.75 kristaps 414:
1.76 kristaps 415: /* ARGSUSED */
416: static enum rofferr
417: roff_ig(ROFF_ARGS)
418: {
1.78 ! kristaps 419: int sv;
! 420: size_t sz;
1.76 kristaps 421:
422: if ( ! roffnode_push(r, tok, ln, ppos))
423: return(ROFF_ERR);
424:
1.78 ! kristaps 425: if ('\0' == (*bufp)[pos]) {
! 426: ROFF_MDEBUG(r, "opening ignore block");
! 427: return(ROFF_IGN);
! 428: }
! 429:
! 430: sv = pos;
! 431: while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
! 432: '\t' != (*bufp)[pos])
! 433: pos++;
! 434:
! 435: /*
! 436: * Note: groff does NOT like escape characters in the input.
! 437: * Instead of detecting this, we're just going to let it fly and
! 438: * to hell with it.
! 439: */
! 440:
! 441: assert(pos > sv);
! 442: sz = (size_t)(pos - sv);
! 443:
! 444: r->last->end = malloc(sz + 1);
! 445:
! 446: if (NULL == r->last->end) {
! 447: (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
! 448: return(ROFF_ERR);
! 449: }
! 450:
! 451: memcpy(r->last->end, *bufp + sv, sz);
! 452: r->last->end[(int)sz] = '\0';
! 453:
! 454: ROFF_MDEBUG(r, "opening explicit ignore block");
1.74 kristaps 455:
1.77 kristaps 456: if ((*bufp)[pos])
457: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
458: return(ROFF_ERR);
1.74 kristaps 459:
1.78 ! kristaps 460: return(ROFF_IGN);
! 461: }
! 462:
! 463:
! 464: /* ARGSUSED */
! 465: static enum rofferr
! 466: roff_ig_text(ROFF_ARGS)
! 467: {
! 468:
! 469: return(ROFF_IGN);
! 470: }
! 471:
! 472:
! 473: /* ARGSUSED */
! 474: static enum rofferr
! 475: roff_if_text(ROFF_ARGS)
! 476: {
! 477: char *ep, *st;
! 478:
! 479: st = &(*bufp)[pos];
! 480: if (NULL == (ep = strstr(st, "\\}"))) {
! 481: roffnode_cleanscope(r);
! 482: return(ROFF_IGN);
! 483: }
! 484:
! 485: if (ep > st && '\\' != *(ep - 1)) {
! 486: ROFF_MDEBUG(r, "closing explicit scope (in-line)");
! 487: roffnode_pop(r);
! 488: }
! 489:
! 490: roffnode_cleanscope(r);
1.74 kristaps 491: return(ROFF_IGN);
492: }
493:
494:
1.75 kristaps 495: /* ARGSUSED */
1.74 kristaps 496: static enum rofferr
1.75 kristaps 497: roff_if(ROFF_ARGS)
1.74 kristaps 498: {
1.77 kristaps 499: int sv;
1.74 kristaps 500:
501: /*
502: * Read ahead past the conditional.
503: * FIXME: this does not work, as conditionals don't end on
504: * whitespace, but are parsed according to a formal grammar.
505: * It's good enough for now, however.
506: */
507:
1.75 kristaps 508: while ((*bufp)[pos] && ' ' != (*bufp)[pos])
509: pos++;
1.77 kristaps 510:
511: sv = pos;
1.75 kristaps 512: while (' ' == (*bufp)[pos])
513: pos++;
1.74 kristaps 514:
1.77 kristaps 515: /*
516: * Roff is weird. If we have just white-space after the
517: * conditional, it's considered the BODY and we exit without
518: * really doing anything. Warn about this. It's probably
519: * wrong.
520: */
521:
522: if ('\0' == (*bufp)[pos] && sv != pos) {
523: if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
524: return(ROFF_ERR);
525: return(ROFF_IGN);
526: }
527:
528: if ( ! roffnode_push(r, tok, ln, ppos))
529: return(ROFF_ERR);
530:
1.74 kristaps 531: /* Don't evaluate: just assume NO. */
532:
1.75 kristaps 533: r->last->endspan = 1;
534:
535: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
536: ROFF_MDEBUG(r, "opening explicit scope");
537: r->last->endspan = -1;
538: pos += 2;
539: } else
540: ROFF_MDEBUG(r, "opening implicit scope");
1.74 kristaps 541:
1.77 kristaps 542: /*
543: * If there are no arguments on the line, the next-line scope is
544: * assumed.
545: */
546:
1.75 kristaps 547: if ('\0' == (*bufp)[pos])
548: return(ROFF_IGN);
1.77 kristaps 549:
550: /* Otherwise re-run the roff parser after recalculating. */
1.74 kristaps 551:
1.75 kristaps 552: *offs = pos;
553: return(ROFF_RERUN);
1.74 kristaps 554: }
CVSweb