Annotation of mandoc/roff.c, Revision 1.71
1.71 ! kristaps 1: /* $Id: roff.c,v 1.70 2010/05/15 20:51:40 kristaps Exp $ */
1.1 kristaps 2: /*
1.67 kristaps 3: * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.66 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.66 kristaps 17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
1.30 kristaps 20:
1.67 kristaps 21: #include <assert.h>
1.1 kristaps 22: #include <stdlib.h>
1.67 kristaps 23: #include <string.h>
1.1 kristaps 24:
1.67 kristaps 25: #include "mandoc.h"
1.43 kristaps 26: #include "roff.h"
1.33 kristaps 27:
1.67 kristaps 28: enum rofft {
29: ROFF_de,
30: ROFF_dei,
31: ROFF_am,
32: ROFF_ami,
33: ROFF_ig,
34: ROFF_close,
35: ROFF_MAX
36: };
37:
38: struct roff {
39: struct roffnode *last; /* leaf of stack */
40: mandocmsg msg; /* err/warn/fatal messages */
41: void *data; /* privdata for messages */
42: };
43:
44: struct roffnode {
45: enum rofft tok; /* type of node */
46: struct roffnode *parent; /* up one in stack */
1.71 ! kristaps 47: char *end; /* custom end-token */
1.67 kristaps 48: int line; /* parse line */
49: int col; /* parse col */
50: };
51:
52: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
53: char **bufp, /* input buffer */ \
54: size_t *szp, /* size of input buffer */ \
55: int ln, /* parse line */ \
56: int ppos /* current pos in buffer */
57:
58: typedef enum rofferr (*roffproc)(ROFF_ARGS);
59:
60: struct roffmac {
61: const char *name; /* macro name */
62: roffproc sub; /* child of control black */
63: roffproc new; /* root of stack (type = ROFF_MAX) */
64: };
65:
66: static enum rofferr roff_ignore(ROFF_ARGS);
67: static enum rofferr roff_new_close(ROFF_ARGS);
68: static enum rofferr roff_new_ig(ROFF_ARGS);
69: static enum rofferr roff_sub_ig(ROFF_ARGS);
70:
71: const struct roffmac roffs[ROFF_MAX] = {
72: { "de", NULL, roff_ignore },
73: { "dei", NULL, roff_ignore },
74: { "am", NULL, roff_ignore },
75: { "ami", NULL, roff_ignore },
76: { "ig", roff_sub_ig, roff_new_ig },
77: { ".", NULL, roff_new_close },
78: };
79:
80: static void roff_free1(struct roff *);
81: static enum rofft roff_hash_find(const char *);
82: static int roffnode_push(struct roff *,
83: enum rofft, int, int);
84: static void roffnode_pop(struct roff *);
85: static enum rofft roff_parse(const char *, int *);
86:
87:
88: /*
89: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
90: * the nil-terminated string name could be found.
91: */
92: static enum rofft
93: roff_hash_find(const char *p)
94: {
95: int i;
96:
97: /* FIXME: make this be fast and efficient. */
98:
99: for (i = 0; i < (int)ROFF_MAX; i++)
100: if (0 == strcmp(roffs[i].name, p))
101: return((enum rofft)i);
102:
103: return(ROFF_MAX);
104: }
105:
106:
107: /*
108: * Pop the current node off of the stack of roff instructions currently
109: * pending.
110: */
111: static void
112: roffnode_pop(struct roff *r)
113: {
114: struct roffnode *p;
115:
116: if (NULL == (p = r->last))
117: return;
118: r->last = p->parent;
119: free(p);
120: }
121:
122:
123: /*
124: * Push a roff node onto the instruction stack. This must later be
125: * removed with roffnode_pop().
126: */
127: static int
128: roffnode_push(struct roff *r, enum rofft tok, int line, int col)
129: {
130: struct roffnode *p;
131:
132: if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
133: (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
134: return(0);
135: }
136:
137: p->tok = tok;
138: p->parent = r->last;
139: p->line = line;
140: p->col = col;
141:
142: r->last = p;
143: return(1);
144: }
145:
146:
147: static void
148: roff_free1(struct roff *r)
149: {
150:
151: while (r->last)
152: roffnode_pop(r);
153: }
154:
155:
156: void
157: roff_reset(struct roff *r)
158: {
159:
160: roff_free1(r);
161: }
162:
163:
164: void
165: roff_free(struct roff *r)
166: {
167:
168: roff_free1(r);
169: free(r);
170: }
171:
172:
173: struct roff *
174: roff_alloc(const mandocmsg msg, void *data)
175: {
176: struct roff *r;
177:
178: if (NULL == (r = calloc(1, sizeof(struct roff)))) {
179: (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
180: return(0);
181: }
182:
183: r->msg = msg;
184: r->data = data;
185: return(r);
186: }
187:
188:
189: enum rofferr
190: roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp)
191: {
192: enum rofft t;
193: int ppos;
194:
195: if (NULL != r->last) {
196: /*
197: * If there's a node on the stack, then jump directly
198: * into its processing function.
199: */
200: t = r->last->tok;
201: assert(roffs[t].sub);
202: return((*roffs[t].sub)(r, bufp, szp, ln, 0));
203: } else if ('.' != (*bufp)[0] && NULL == r->last)
204: /* Return when in free text without a context. */
205: return(ROFF_CONT);
206:
207: /* There's nothing on the stack: make us anew. */
208:
209: if (ROFF_MAX == (t = roff_parse(*bufp, &ppos)))
210: return(ROFF_CONT);
211:
212: assert(roffs[t].new);
213: return((*roffs[t].new)(r, bufp, szp, ln, ppos));
214: }
215:
216:
217: /*
218: * Parse a roff node's type from the input buffer. This must be in the
219: * form of ".foo xxx" in the usual way.
220: */
221: static enum rofft
222: roff_parse(const char *buf, int *pos)
223: {
224: int j;
225: char mac[5];
226: enum rofft t;
227:
228: assert('.' == buf[0]);
229: *pos = 1;
230:
231: while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
232: (*pos)++;
233:
234: if ('\0' == buf[*pos])
235: return(ROFF_MAX);
236:
237: for (j = 0; j < 4; j++, (*pos)++)
238: if ('\0' == (mac[j] = buf[*pos]))
239: break;
240: else if (' ' == buf[*pos])
241: break;
242:
243: if (j == 4 || j < 1)
244: return(ROFF_MAX);
245:
246: mac[j] = '\0';
247:
248: if (ROFF_MAX == (t = roff_hash_find(mac)))
249: return(t);
250:
251: while (buf[*pos] && ' ' == buf[*pos])
252: (*pos)++;
253:
254: return(t);
255: }
256:
257:
258: /* ARGSUSED */
259: static enum rofferr
260: roff_ignore(ROFF_ARGS)
261: {
262:
263: return(ROFF_IGN);
264: }
265:
266:
267: /* ARGSUSED */
268: static enum rofferr
269: roff_sub_ig(ROFF_ARGS)
270: {
1.71 ! kristaps 271: int i, j;
1.67 kristaps 272:
273: /* Ignore free-text lines. */
274:
275: if ('.' != (*bufp)[ppos])
276: return(ROFF_IGN);
277:
1.71 ! kristaps 278: if (r->last->end) {
! 279: /*
! 280: * Allow a macro to break us, if we've defined a special
! 281: * one for the case. Old groff didn't allow spaces to
! 282: * buffer the macro, but new groff does. Whee!
! 283: */
! 284: i = ppos + 1;
! 285: while ((*bufp)[i] && ' ' == (*bufp)[i])
! 286: i++;
! 287:
! 288: if ('\0' == (*bufp)[i])
! 289: return(ROFF_IGN);
! 290:
! 291: for (j = 0; r->last->end[j]; i++, j++)
! 292: if ((*bufp)[i] != r->last->end[j])
! 293: return(ROFF_IGN);
! 294:
! 295: if (r->last->end[j])
! 296: return(ROFF_IGN);
! 297: if ((*bufp)[i] && ' ' != (*bufp)[i])
! 298: return(ROFF_IGN);
! 299:
! 300: while (' ' == (*bufp)[i])
! 301: i++;
! 302: } else if (ROFF_close != roff_parse(*bufp, &i))
! 303: return(ROFF_IGN);
! 304:
! 305: /*
! 306: * Pop off the ignoring context and warn if we're going to lose
! 307: * any of our remaining arguments.
! 308: */
1.67 kristaps 309:
1.71 ! kristaps 310: roffnode_pop(r);
! 311:
! 312: if ('\0' == (*bufp)[i])
1.67 kristaps 313: return(ROFF_IGN);
1.71 ! kristaps 314: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL))
! 315: return(ROFF_ERR);
1.67 kristaps 316:
317: return(ROFF_IGN);
318: }
319:
320:
321: /* ARGSUSED */
322: static enum rofferr
323: roff_new_close(ROFF_ARGS)
324: {
325:
1.68 kristaps 326: /*
1.67 kristaps 327: if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
328: return(ROFF_ERR);
1.68 kristaps 329: */
1.69 kristaps 330: return(ROFF_IGN);
1.67 kristaps 331: }
332:
333:
334: /* ARGSUSED */
335: static enum rofferr
336: roff_new_ig(ROFF_ARGS)
337: {
1.71 ! kristaps 338: int i;
1.67 kristaps 339:
1.71 ! kristaps 340: if ( ! roffnode_push(r, ROFF_ig, ln, ppos))
! 341: return(ROFF_ERR);
! 342:
! 343: i = (int)ppos;
! 344: while ((*bufp)[i] && ' ' != (*bufp)[i])
! 345: i++;
! 346:
! 347: if (i == (int)ppos)
! 348: return(ROFF_IGN);
! 349: if ((*bufp)[i])
! 350: if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL))
! 351: return(ROFF_ERR);
! 352:
! 353: /*
! 354: * If `.ig' has arguments, the first argument (up to the next
! 355: * whitespace) is interpreted as an argument marking the macro
! 356: * close. Thus, `.ig foo' will close at `.foo'.
! 357: *
! 358: * NOTE: the closing macro `.foo' in the above case is not
! 359: * allowed to have leading spaces with old groff! Thus `.foo'
! 360: * != `. foo'. Oh yeah, everything after the `.foo' is lost.
! 361: * Merry fucking Christmas.
! 362: */
! 363:
! 364: r->last->end = malloc((size_t)i - ppos + 1);
! 365: if (NULL == r->last->end) {
! 366: (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
! 367: return(ROFF_ERR);
! 368: }
! 369:
! 370: memcpy(r->last->end, &(*bufp)[ppos], (size_t)i - ppos);
! 371: r->last->end[(size_t)i - ppos] = '\0';
! 372:
! 373: return(ROFF_IGN);
1.67 kristaps 374: }
375:
376:
377: int
378: roff_endparse(struct roff *r)
379: {
380:
381: if (NULL == r->last)
382: return(1);
383: return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
384: r->last->line, r->last->col, NULL));
385: }
CVSweb