Annotation of mandoc/roff.c, Revision 1.167
1.167 ! kristaps 1: /* $Id: roff.c,v 1.166 2011/07/29 09:19:48 kristaps Exp $ */
1.1 kristaps 2: /*
1.119 schwarze 3: * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.66 kristaps 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.106 kristaps 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.66 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.106 kristaps 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.66 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 17: */
1.66 kristaps 18: #ifdef HAVE_CONFIG_H
19: #include "config.h"
20: #endif
1.30 kristaps 21:
1.67 kristaps 22: #include <assert.h>
1.85 kristaps 23: #include <ctype.h>
1.1 kristaps 24: #include <stdlib.h>
1.67 kristaps 25: #include <string.h>
1.1 kristaps 26:
1.67 kristaps 27: #include "mandoc.h"
1.109 kristaps 28: #include "libroff.h"
1.94 kristaps 29: #include "libmandoc.h"
1.33 kristaps 30:
1.141 kristaps 31: /* Maximum number of nested if-else conditionals. */
1.82 kristaps 32: #define RSTACK_MAX 128
33:
1.67 kristaps 34: enum rofft {
1.103 kristaps 35: ROFF_ad,
1.80 kristaps 36: ROFF_am,
37: ROFF_ami,
38: ROFF_am1,
39: ROFF_de,
40: ROFF_dei,
41: ROFF_de1,
1.83 schwarze 42: ROFF_ds,
1.82 kristaps 43: ROFF_el,
1.103 kristaps 44: ROFF_hy,
1.82 kristaps 45: ROFF_ie,
1.75 kristaps 46: ROFF_if,
1.76 kristaps 47: ROFF_ig,
1.123 schwarze 48: ROFF_it,
1.103 kristaps 49: ROFF_ne,
50: ROFF_nh,
1.104 kristaps 51: ROFF_nr,
1.124 schwarze 52: ROFF_ns,
53: ROFF_ps,
1.83 schwarze 54: ROFF_rm,
1.105 kristaps 55: ROFF_so,
1.124 schwarze 56: ROFF_ta,
1.83 schwarze 57: ROFF_tr,
1.109 kristaps 58: ROFF_TS,
59: ROFF_TE,
1.112 kristaps 60: ROFF_T_,
1.125 kristaps 61: ROFF_EQ,
62: ROFF_EN,
1.76 kristaps 63: ROFF_cblock,
1.141 kristaps 64: ROFF_ccond,
1.106 kristaps 65: ROFF_USERDEF,
1.67 kristaps 66: ROFF_MAX
67: };
68:
1.82 kristaps 69: enum roffrule {
70: ROFFRULE_ALLOW,
71: ROFFRULE_DENY
72: };
73:
1.147 kristaps 74: /*
75: * A single register entity. If "set" is zero, the value of the
76: * register should be the default one, which is per-register.
77: * Registers are assumed to be unsigned ints for now.
78: */
79: struct reg {
1.166 kristaps 80: int set; /* whether set or not */
81: unsigned int u; /* unsigned integer */
1.147 kristaps 82: };
83:
1.167 ! kristaps 84: /*
! 85: * An incredibly-simple string buffer.
! 86: */
1.94 kristaps 87: struct roffstr {
1.167 ! kristaps 88: char *p; /* nil-terminated buffer */
! 89: size_t sz; /* saved strlen(p) */
1.166 kristaps 90: };
91:
92: /*
1.167 ! kristaps 93: * A key-value roffstr pair as part of a singly-linked list.
1.166 kristaps 94: */
95: struct roffkv {
96: struct roffstr key;
97: struct roffstr val;
98: struct roffkv *next; /* next in list */
1.94 kristaps 99: };
100:
1.67 kristaps 101: struct roff {
1.128 kristaps 102: struct mparse *parse; /* parse point */
1.67 kristaps 103: struct roffnode *last; /* leaf of stack */
1.82 kristaps 104: enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
105: int rstackpos; /* position in rstack */
1.147 kristaps 106: struct reg regs[REG__MAX];
1.166 kristaps 107: struct roffkv *strtab; /* user-defined strings & macros */
1.167 ! kristaps 108: struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
! 109: struct roffstr *xtab; /* single-byte trans table (`tr') */
1.106 kristaps 110: const char *current_string; /* value of last called user macro */
1.118 kristaps 111: struct tbl_node *first_tbl; /* first table parsed */
112: struct tbl_node *last_tbl; /* last table parsed */
113: struct tbl_node *tbl; /* current table being parsed */
1.125 kristaps 114: struct eqn_node *last_eqn; /* last equation parsed */
115: struct eqn_node *first_eqn; /* first equation parsed */
116: struct eqn_node *eqn; /* current equation being parsed */
1.79 kristaps 117: };
118:
1.67 kristaps 119: struct roffnode {
120: enum rofft tok; /* type of node */
121: struct roffnode *parent; /* up one in stack */
122: int line; /* parse line */
123: int col; /* parse col */
1.106 kristaps 124: char *name; /* node name, e.g. macro name */
1.79 kristaps 125: char *end; /* end-rules: custom token */
126: int endspan; /* end-rules: next-line or infty */
1.82 kristaps 127: enum roffrule rule; /* current evaluation rule */
1.67 kristaps 128: };
129:
130: #define ROFF_ARGS struct roff *r, /* parse ctx */ \
1.72 kristaps 131: enum rofft tok, /* tok of macro */ \
1.67 kristaps 132: char **bufp, /* input buffer */ \
133: size_t *szp, /* size of input buffer */ \
134: int ln, /* parse line */ \
1.75 kristaps 135: int ppos, /* original pos in buffer */ \
136: int pos, /* current pos in buffer */ \
1.74 kristaps 137: int *offs /* reset offset of buffer data */
1.67 kristaps 138:
139: typedef enum rofferr (*roffproc)(ROFF_ARGS);
140:
141: struct roffmac {
142: const char *name; /* macro name */
1.79 kristaps 143: roffproc proc; /* process new macro */
144: roffproc text; /* process as child text of macro */
145: roffproc sub; /* process as child of macro */
146: int flags;
147: #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
1.85 kristaps 148: struct roffmac *next;
1.67 kristaps 149: };
150:
1.141 kristaps 151: struct predef {
152: const char *name; /* predefined input name */
153: const char *str; /* replacement symbol */
154: };
155:
156: #define PREDEF(__name, __str) \
157: { (__name), (__str) },
158:
1.155 kristaps 159: static enum rofft roffhash_find(const char *, size_t);
160: static void roffhash_init(void);
161: static void roffnode_cleanscope(struct roff *);
162: static void roffnode_pop(struct roff *);
163: static void roffnode_push(struct roff *, enum rofft,
164: const char *, int, int);
1.80 kristaps 165: static enum rofferr roff_block(ROFF_ARGS);
166: static enum rofferr roff_block_text(ROFF_ARGS);
167: static enum rofferr roff_block_sub(ROFF_ARGS);
168: static enum rofferr roff_cblock(ROFF_ARGS);
169: static enum rofferr roff_ccond(ROFF_ARGS);
1.82 kristaps 170: static enum rofferr roff_cond(ROFF_ARGS);
171: static enum rofferr roff_cond_text(ROFF_ARGS);
172: static enum rofferr roff_cond_sub(ROFF_ARGS);
1.92 schwarze 173: static enum rofferr roff_ds(ROFF_ARGS);
1.94 kristaps 174: static enum roffrule roff_evalcond(const char *, int *);
1.155 kristaps 175: static void roff_free1(struct roff *);
1.167 ! kristaps 176: static void roff_freestr(struct roffkv *);
1.121 schwarze 177: static char *roff_getname(struct roff *, char **, int, int);
1.94 kristaps 178: static const char *roff_getstrn(const struct roff *,
179: const char *, size_t);
1.103 kristaps 180: static enum rofferr roff_line_ignore(ROFF_ARGS);
1.89 kristaps 181: static enum rofferr roff_nr(ROFF_ARGS);
1.156 kristaps 182: static void roff_openeqn(struct roff *, const char *,
183: int, int, const char *);
1.155 kristaps 184: static enum rofft roff_parse(struct roff *, const char *, int *);
185: static enum rofferr roff_parsetext(char *);
1.154 kristaps 186: static void roff_res(struct roff *,
1.142 kristaps 187: char **, size_t *, int, int);
1.122 schwarze 188: static enum rofferr roff_rm(ROFF_ARGS);
1.94 kristaps 189: static void roff_setstr(struct roff *,
1.106 kristaps 190: const char *, const char *, int);
1.166 kristaps 191: static void roff_setstrn(struct roffkv **, const char *,
1.164 kristaps 192: size_t, const char *, size_t, int);
1.105 kristaps 193: static enum rofferr roff_so(ROFF_ARGS);
1.164 kristaps 194: static enum rofferr roff_tr(ROFF_ARGS);
1.109 kristaps 195: static enum rofferr roff_TE(ROFF_ARGS);
196: static enum rofferr roff_TS(ROFF_ARGS);
1.125 kristaps 197: static enum rofferr roff_EQ(ROFF_ARGS);
198: static enum rofferr roff_EN(ROFF_ARGS);
1.112 kristaps 199: static enum rofferr roff_T_(ROFF_ARGS);
1.106 kristaps 200: static enum rofferr roff_userdef(ROFF_ARGS);
1.67 kristaps 201:
1.155 kristaps 202: /* See roffhash_find() */
1.85 kristaps 203:
204: #define ASCII_HI 126
205: #define ASCII_LO 33
206: #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
207:
208: static struct roffmac *hash[HASHWIDTH];
209:
210: static struct roffmac roffs[ROFF_MAX] = {
1.103 kristaps 211: { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
1.85 kristaps 212: { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
213: { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
214: { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
215: { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
216: { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
217: { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.92 schwarze 218: { "ds", roff_ds, NULL, NULL, 0, NULL },
1.85 kristaps 219: { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
1.103 kristaps 220: { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
1.85 kristaps 221: { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
222: { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
223: { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
1.123 schwarze 224: { "it", roff_line_ignore, NULL, NULL, 0, NULL },
1.103 kristaps 225: { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
226: { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
1.104 kristaps 227: { "nr", roff_nr, NULL, NULL, 0, NULL },
1.124 schwarze 228: { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
229: { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
1.122 schwarze 230: { "rm", roff_rm, NULL, NULL, 0, NULL },
1.105 kristaps 231: { "so", roff_so, NULL, NULL, 0, NULL },
1.124 schwarze 232: { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
1.164 kristaps 233: { "tr", roff_tr, NULL, NULL, 0, NULL },
1.109 kristaps 234: { "TS", roff_TS, NULL, NULL, 0, NULL },
235: { "TE", roff_TE, NULL, NULL, 0, NULL },
1.112 kristaps 236: { "T&", roff_T_, NULL, NULL, 0, NULL },
1.125 kristaps 237: { "EQ", roff_EQ, NULL, NULL, 0, NULL },
238: { "EN", roff_EN, NULL, NULL, 0, NULL },
1.85 kristaps 239: { ".", roff_cblock, NULL, NULL, 0, NULL },
240: { "\\}", roff_ccond, NULL, NULL, 0, NULL },
1.106 kristaps 241: { NULL, roff_userdef, NULL, NULL, 0, NULL },
1.67 kristaps 242: };
243:
1.141 kristaps 244: /* Array of injected predefined strings. */
245: #define PREDEFS_MAX 38
246: static const struct predef predefs[PREDEFS_MAX] = {
247: #include "predefs.in"
248: };
249:
1.155 kristaps 250: /* See roffhash_find() */
1.85 kristaps 251: #define ROFF_HASH(p) (p[0] - ASCII_LO)
252:
253: static void
1.155 kristaps 254: roffhash_init(void)
1.85 kristaps 255: {
256: struct roffmac *n;
257: int buc, i;
258:
1.106 kristaps 259: for (i = 0; i < (int)ROFF_USERDEF; i++) {
1.85 kristaps 260: assert(roffs[i].name[0] >= ASCII_LO);
261: assert(roffs[i].name[0] <= ASCII_HI);
262:
263: buc = ROFF_HASH(roffs[i].name);
264:
265: if (NULL != (n = hash[buc])) {
266: for ( ; n->next; n = n->next)
267: /* Do nothing. */ ;
268: n->next = &roffs[i];
269: } else
270: hash[buc] = &roffs[i];
271: }
272: }
273:
1.67 kristaps 274: /*
275: * Look up a roff token by its name. Returns ROFF_MAX if no macro by
276: * the nil-terminated string name could be found.
277: */
278: static enum rofft
1.155 kristaps 279: roffhash_find(const char *p, size_t s)
1.67 kristaps 280: {
1.85 kristaps 281: int buc;
282: struct roffmac *n;
1.67 kristaps 283:
1.85 kristaps 284: /*
285: * libroff has an extremely simple hashtable, for the time
286: * being, which simply keys on the first character, which must
287: * be printable, then walks a chain. It works well enough until
288: * optimised.
289: */
290:
291: if (p[0] < ASCII_LO || p[0] > ASCII_HI)
292: return(ROFF_MAX);
293:
294: buc = ROFF_HASH(p);
295:
296: if (NULL == (n = hash[buc]))
297: return(ROFF_MAX);
298: for ( ; n; n = n->next)
1.106 kristaps 299: if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
1.85 kristaps 300: return((enum rofft)(n - roffs));
1.67 kristaps 301:
302: return(ROFF_MAX);
303: }
304:
305:
306: /*
307: * Pop the current node off of the stack of roff instructions currently
308: * pending.
309: */
310: static void
311: roffnode_pop(struct roff *r)
312: {
313: struct roffnode *p;
314:
1.75 kristaps 315: assert(r->last);
316: p = r->last;
1.82 kristaps 317:
1.75 kristaps 318: r->last = r->last->parent;
1.106 kristaps 319: free(p->name);
320: free(p->end);
1.67 kristaps 321: free(p);
322: }
323:
324:
325: /*
326: * Push a roff node onto the instruction stack. This must later be
327: * removed with roffnode_pop().
328: */
1.98 schwarze 329: static void
1.106 kristaps 330: roffnode_push(struct roff *r, enum rofft tok, const char *name,
331: int line, int col)
1.67 kristaps 332: {
333: struct roffnode *p;
334:
1.98 schwarze 335: p = mandoc_calloc(1, sizeof(struct roffnode));
1.67 kristaps 336: p->tok = tok;
1.106 kristaps 337: if (name)
338: p->name = mandoc_strdup(name);
1.67 kristaps 339: p->parent = r->last;
340: p->line = line;
341: p->col = col;
1.79 kristaps 342: p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
1.67 kristaps 343:
344: r->last = p;
345: }
346:
347:
348: static void
349: roff_free1(struct roff *r)
350: {
1.118 kristaps 351: struct tbl_node *t;
1.125 kristaps 352: struct eqn_node *e;
1.167 ! kristaps 353: int i;
1.67 kristaps 354:
1.125 kristaps 355: while (NULL != (t = r->first_tbl)) {
1.113 kristaps 356: r->first_tbl = t->next;
357: tbl_free(t);
1.109 kristaps 358: }
359:
1.113 kristaps 360: r->first_tbl = r->last_tbl = r->tbl = NULL;
361:
1.125 kristaps 362: while (NULL != (e = r->first_eqn)) {
363: r->first_eqn = e->next;
364: eqn_free(e);
365: }
366:
367: r->first_eqn = r->last_eqn = r->eqn = NULL;
368:
1.67 kristaps 369: while (r->last)
370: roffnode_pop(r);
1.109 kristaps 371:
1.167 ! kristaps 372: roff_freestr(r->strtab);
! 373: roff_freestr(r->xmbtab);
! 374:
! 375: r->strtab = r->xmbtab = NULL;
! 376:
! 377: if (r->xtab)
! 378: for (i = 0; i < 128; i++)
! 379: free(r->xtab[i].p);
! 380:
! 381: free(r->xtab);
! 382: r->xtab = NULL;
1.67 kristaps 383: }
384:
385: void
386: roff_reset(struct roff *r)
387: {
1.143 kristaps 388: int i;
1.67 kristaps 389:
390: roff_free1(r);
1.143 kristaps 391:
1.147 kristaps 392: memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
393:
1.143 kristaps 394: for (i = 0; i < PREDEFS_MAX; i++)
395: roff_setstr(r, predefs[i].name, predefs[i].str, 0);
1.67 kristaps 396: }
397:
398:
399: void
400: roff_free(struct roff *r)
401: {
402:
403: roff_free1(r);
404: free(r);
405: }
406:
407:
408: struct roff *
1.147 kristaps 409: roff_alloc(struct mparse *parse)
1.67 kristaps 410: {
411: struct roff *r;
1.141 kristaps 412: int i;
1.67 kristaps 413:
1.98 schwarze 414: r = mandoc_calloc(1, sizeof(struct roff));
1.128 kristaps 415: r->parse = parse;
1.82 kristaps 416: r->rstackpos = -1;
1.85 kristaps 417:
1.155 kristaps 418: roffhash_init();
1.141 kristaps 419:
420: for (i = 0; i < PREDEFS_MAX; i++)
421: roff_setstr(r, predefs[i].name, predefs[i].str, 0);
422:
1.67 kristaps 423: return(r);
424: }
425:
1.94 kristaps 426: /*
427: * Pre-filter each and every line for reserved words (one beginning with
428: * `\*', e.g., `\*(ab'). These must be handled before the actual line
429: * is processed.
1.153 kristaps 430: * This also checks the syntax of regular escapes.
1.154 kristaps 431: */
432: static void
1.142 kristaps 433: roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
1.94 kristaps 434: {
1.152 kristaps 435: enum mandoc_esc esc;
1.108 schwarze 436: const char *stesc; /* start of an escape sequence ('\\') */
437: const char *stnam; /* start of the name, after "[(*" */
438: const char *cp; /* end of the name, e.g. before ']' */
439: const char *res; /* the string to be substituted */
1.94 kristaps 440: int i, maxl;
441: size_t nsz;
442: char *n;
443:
1.154 kristaps 444: again:
1.108 schwarze 445: cp = *bufp + pos;
446: while (NULL != (cp = strchr(cp, '\\'))) {
447: stesc = cp++;
448:
449: /*
450: * The second character must be an asterisk.
451: * If it isn't, skip it anyway: It is escaped,
452: * so it can't start another escape sequence.
453: */
454:
455: if ('\0' == *cp)
1.154 kristaps 456: return;
1.152 kristaps 457:
458: if ('*' != *cp) {
459: res = cp;
460: esc = mandoc_escape(&cp, NULL, NULL);
461: if (ESCAPE_ERROR != esc)
462: continue;
463: cp = res;
1.153 kristaps 464: mandoc_msg
465: (MANDOCERR_BADESCAPE, r->parse,
466: ln, (int)(stesc - *bufp), NULL);
1.154 kristaps 467: return;
1.152 kristaps 468: }
469:
470: cp++;
1.108 schwarze 471:
472: /*
473: * The third character decides the length
474: * of the name of the string.
475: * Save a pointer to the name.
476: */
477:
1.94 kristaps 478: switch (*cp) {
1.108 schwarze 479: case ('\0'):
1.154 kristaps 480: return;
1.94 kristaps 481: case ('('):
482: cp++;
483: maxl = 2;
484: break;
485: case ('['):
486: cp++;
487: maxl = 0;
488: break;
489: default:
490: maxl = 1;
491: break;
492: }
1.108 schwarze 493: stnam = cp;
1.94 kristaps 494:
1.108 schwarze 495: /* Advance to the end of the name. */
1.94 kristaps 496:
497: for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
1.153 kristaps 498: if ('\0' == *cp) {
499: mandoc_msg
500: (MANDOCERR_BADESCAPE,
501: r->parse, ln,
502: (int)(stesc - *bufp), NULL);
1.154 kristaps 503: return;
1.153 kristaps 504: }
1.94 kristaps 505: if (0 == maxl && ']' == *cp)
506: break;
507: }
508:
1.108 schwarze 509: /*
510: * Retrieve the replacement string; if it is
511: * undefined, resume searching for escapes.
512: */
513:
514: res = roff_getstrn(r, stnam, (size_t)i);
1.94 kristaps 515:
516: if (NULL == res) {
1.153 kristaps 517: mandoc_msg
518: (MANDOCERR_BADESCAPE, r->parse,
519: ln, (int)(stesc - *bufp), NULL);
1.142 kristaps 520: res = "";
1.94 kristaps 521: }
522:
1.108 schwarze 523: /* Replace the escape sequence by the string. */
524:
1.161 kristaps 525: pos = stesc - *bufp;
1.154 kristaps 526:
1.94 kristaps 527: nsz = *szp + strlen(res) + 1;
528: n = mandoc_malloc(nsz);
529:
1.108 schwarze 530: strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
1.94 kristaps 531: strlcat(n, res, nsz);
532: strlcat(n, cp + (maxl ? 0 : 1), nsz);
533:
534: free(*bufp);
535:
536: *bufp = n;
537: *szp = nsz;
1.154 kristaps 538: goto again;
539: }
540: }
541:
542: /*
543: * Process text streams: convert all breakable hyphens into ASCII_HYPH.
544: */
545: static enum rofferr
546: roff_parsetext(char *p)
547: {
1.155 kristaps 548: char l, r;
1.154 kristaps 549: size_t sz;
550: const char *start;
551: enum mandoc_esc esc;
552:
553: start = p;
554:
555: while ('\0' != *p) {
556: sz = strcspn(p, "-\\");
557: p += sz;
558:
1.159 kristaps 559: if ('\0' == *p)
560: break;
561:
1.154 kristaps 562: if ('\\' == *p) {
563: /* Skip over escapes. */
564: p++;
565: esc = mandoc_escape
566: ((const char **)&p, NULL, NULL);
567: if (ESCAPE_ERROR == esc)
568: break;
1.155 kristaps 569: continue;
1.159 kristaps 570: } else if (p == start) {
1.158 kristaps 571: p++;
1.155 kristaps 572: continue;
1.158 kristaps 573: }
1.155 kristaps 574:
575: l = *(p - 1);
576: r = *(p + 1);
577: if ('\\' != l &&
578: '\t' != r && '\t' != l &&
579: ' ' != r && ' ' != l &&
580: '-' != r && '-' != l &&
581: ! isdigit((unsigned char)l) &&
1.159 kristaps 582: ! isdigit((unsigned char)r))
1.155 kristaps 583: *p = ASCII_HYPH;
584: p++;
1.94 kristaps 585: }
586:
1.154 kristaps 587: return(ROFF_CONT);
1.94 kristaps 588: }
589:
1.67 kristaps 590: enum rofferr
1.90 kristaps 591: roff_parseln(struct roff *r, int ln, char **bufp,
592: size_t *szp, int pos, int *offs)
1.67 kristaps 593: {
594: enum rofft t;
1.109 kristaps 595: enum rofferr e;
1.130 kristaps 596: int ppos, ctl;
1.79 kristaps 597:
598: /*
1.94 kristaps 599: * Run the reserved-word filter only if we have some reserved
600: * words to fill in.
601: */
602:
1.154 kristaps 603: roff_res(r, bufp, szp, ln, pos);
1.94 kristaps 604:
1.130 kristaps 605: ppos = pos;
606: ctl = mandoc_getcontrol(*bufp, &pos);
607:
1.94 kristaps 608: /*
1.79 kristaps 609: * First, if a scope is open and we're not a macro, pass the
610: * text through the macro's filter. If a scope isn't open and
611: * we're not a macro, just let it through.
1.125 kristaps 612: * Finally, if there's an equation scope open, divert it into it
613: * no matter our state.
1.79 kristaps 614: */
1.74 kristaps 615:
1.130 kristaps 616: if (r->last && ! ctl) {
1.78 kristaps 617: t = r->last->tok;
618: assert(roffs[t].text);
1.109 kristaps 619: e = (*roffs[t].text)
620: (r, t, bufp, szp, ln, pos, pos, offs);
621: assert(ROFF_IGN == e || ROFF_CONT == e);
1.125 kristaps 622: if (ROFF_CONT != e)
623: return(e);
624: if (r->eqn)
1.146 kristaps 625: return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
1.125 kristaps 626: if (r->tbl)
1.130 kristaps 627: return(tbl_read(r->tbl, ln, *bufp, pos));
1.154 kristaps 628: return(roff_parsetext(*bufp + pos));
1.130 kristaps 629: } else if ( ! ctl) {
1.125 kristaps 630: if (r->eqn)
1.146 kristaps 631: return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
1.109 kristaps 632: if (r->tbl)
1.130 kristaps 633: return(tbl_read(r->tbl, ln, *bufp, pos));
1.154 kristaps 634: return(roff_parsetext(*bufp + pos));
1.125 kristaps 635: } else if (r->eqn)
1.146 kristaps 636: return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
1.67 kristaps 637:
1.79 kristaps 638: /*
639: * If a scope is open, go to the child handler for that macro,
640: * as it may want to preprocess before doing anything with it.
1.125 kristaps 641: * Don't do so if an equation is open.
1.79 kristaps 642: */
1.78 kristaps 643:
1.79 kristaps 644: if (r->last) {
645: t = r->last->tok;
646: assert(roffs[t].sub);
647: return((*roffs[t].sub)
1.90 kristaps 648: (r, t, bufp, szp,
1.130 kristaps 649: ln, ppos, pos, offs));
1.79 kristaps 650: }
1.78 kristaps 651:
1.79 kristaps 652: /*
653: * Lastly, as we've no scope open, try to look up and execute
654: * the new macro. If no macro is found, simply return and let
655: * the compilers handle it.
656: */
1.67 kristaps 657:
1.106 kristaps 658: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
1.79 kristaps 659: return(ROFF_CONT);
1.67 kristaps 660:
1.75 kristaps 661: assert(roffs[t].proc);
1.78 kristaps 662: return((*roffs[t].proc)
1.90 kristaps 663: (r, t, bufp, szp,
664: ln, ppos, pos, offs));
1.74 kristaps 665: }
666:
667:
1.117 kristaps 668: void
1.74 kristaps 669: roff_endparse(struct roff *r)
670: {
671:
1.110 kristaps 672: if (r->last)
1.128 kristaps 673: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.109 kristaps 674: r->last->line, r->last->col, NULL);
1.117 kristaps 675:
1.125 kristaps 676: if (r->eqn) {
1.128 kristaps 677: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.148 kristaps 678: r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
1.151 kristaps 679: eqn_end(&r->eqn);
1.125 kristaps 680: }
681:
1.117 kristaps 682: if (r->tbl) {
1.128 kristaps 683: mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
1.117 kristaps 684: r->tbl->line, r->tbl->pos, NULL);
1.151 kristaps 685: tbl_end(&r->tbl);
1.117 kristaps 686: }
1.67 kristaps 687: }
688:
689: /*
690: * Parse a roff node's type from the input buffer. This must be in the
691: * form of ".foo xxx" in the usual way.
692: */
693: static enum rofft
1.106 kristaps 694: roff_parse(struct roff *r, const char *buf, int *pos)
1.67 kristaps 695: {
1.106 kristaps 696: const char *mac;
697: size_t maclen;
1.67 kristaps 698: enum rofft t;
699:
1.144 kristaps 700: if ('\0' == buf[*pos] || '"' == buf[*pos] ||
701: '\t' == buf[*pos] || ' ' == buf[*pos])
1.67 kristaps 702: return(ROFF_MAX);
703:
1.144 kristaps 704: /*
705: * We stop the macro parse at an escape, tab, space, or nil.
706: * However, `\}' is also a valid macro, so make sure we don't
707: * clobber it by seeing the `\' as the end of token.
708: */
709:
1.106 kristaps 710: mac = buf + *pos;
1.144 kristaps 711: maclen = strcspn(mac + 1, " \\\t\0") + 1;
1.67 kristaps 712:
1.106 kristaps 713: t = (r->current_string = roff_getstrn(r, mac, maclen))
1.155 kristaps 714: ? ROFF_USERDEF : roffhash_find(mac, maclen);
1.67 kristaps 715:
1.127 kristaps 716: *pos += (int)maclen;
1.130 kristaps 717:
1.67 kristaps 718: while (buf[*pos] && ' ' == buf[*pos])
719: (*pos)++;
720:
721: return(t);
722: }
723:
724: /* ARGSUSED */
725: static enum rofferr
1.76 kristaps 726: roff_cblock(ROFF_ARGS)
1.67 kristaps 727: {
728:
1.79 kristaps 729: /*
730: * A block-close `..' should only be invoked as a child of an
731: * ignore macro, otherwise raise a warning and just ignore it.
732: */
733:
1.76 kristaps 734: if (NULL == r->last) {
1.128 kristaps 735: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.76 kristaps 736: return(ROFF_IGN);
737: }
1.67 kristaps 738:
1.81 kristaps 739: switch (r->last->tok) {
740: case (ROFF_am):
741: /* FALLTHROUGH */
742: case (ROFF_ami):
743: /* FALLTHROUGH */
744: case (ROFF_am1):
745: /* FALLTHROUGH */
746: case (ROFF_de):
1.108 schwarze 747: /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1.81 kristaps 748: /* FALLTHROUGH */
749: case (ROFF_dei):
750: /* FALLTHROUGH */
751: case (ROFF_ig):
752: break;
753: default:
1.128 kristaps 754: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.67 kristaps 755: return(ROFF_IGN);
1.76 kristaps 756: }
1.67 kristaps 757:
1.76 kristaps 758: if ((*bufp)[pos])
1.128 kristaps 759: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.71 kristaps 760:
761: roffnode_pop(r);
1.76 kristaps 762: roffnode_cleanscope(r);
763: return(ROFF_IGN);
1.71 kristaps 764:
1.67 kristaps 765: }
766:
767:
1.76 kristaps 768: static void
769: roffnode_cleanscope(struct roff *r)
1.67 kristaps 770: {
771:
1.76 kristaps 772: while (r->last) {
773: if (--r->last->endspan < 0)
774: break;
775: roffnode_pop(r);
776: }
1.67 kristaps 777: }
778:
779:
1.75 kristaps 780: /* ARGSUSED */
1.74 kristaps 781: static enum rofferr
1.75 kristaps 782: roff_ccond(ROFF_ARGS)
1.74 kristaps 783: {
784:
1.76 kristaps 785: if (NULL == r->last) {
1.128 kristaps 786: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.76 kristaps 787: return(ROFF_IGN);
788: }
789:
1.82 kristaps 790: switch (r->last->tok) {
791: case (ROFF_el):
792: /* FALLTHROUGH */
793: case (ROFF_ie):
794: /* FALLTHROUGH */
795: case (ROFF_if):
796: break;
797: default:
1.128 kristaps 798: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.75 kristaps 799: return(ROFF_IGN);
800: }
801:
1.76 kristaps 802: if (r->last->endspan > -1) {
1.128 kristaps 803: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.76 kristaps 804: return(ROFF_IGN);
805: }
806:
807: if ((*bufp)[pos])
1.128 kristaps 808: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.76 kristaps 809:
1.75 kristaps 810: roffnode_pop(r);
1.76 kristaps 811: roffnode_cleanscope(r);
812: return(ROFF_IGN);
813: }
814:
1.75 kristaps 815:
1.76 kristaps 816: /* ARGSUSED */
817: static enum rofferr
1.80 kristaps 818: roff_block(ROFF_ARGS)
1.76 kristaps 819: {
1.78 kristaps 820: int sv;
821: size_t sz;
1.106 kristaps 822: char *name;
823:
824: name = NULL;
1.76 kristaps 825:
1.106 kristaps 826: if (ROFF_ig != tok) {
827: if ('\0' == (*bufp)[pos]) {
1.128 kristaps 828: mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1.106 kristaps 829: return(ROFF_IGN);
830: }
1.107 kristaps 831:
832: /*
833: * Re-write `de1', since we don't really care about
834: * groff's strange compatibility mode, into `de'.
835: */
836:
1.106 kristaps 837: if (ROFF_de1 == tok)
838: tok = ROFF_de;
839: if (ROFF_de == tok)
840: name = *bufp + pos;
841: else
1.128 kristaps 842: mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
1.106 kristaps 843: roffs[tok].name);
1.107 kristaps 844:
1.131 schwarze 845: while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
1.80 kristaps 846: pos++;
1.107 kristaps 847:
1.131 schwarze 848: while (isspace((unsigned char)(*bufp)[pos]))
1.106 kristaps 849: (*bufp)[pos++] = '\0';
1.80 kristaps 850: }
851:
1.106 kristaps 852: roffnode_push(r, tok, name, ln, ppos);
853:
854: /*
855: * At the beginning of a `de' macro, clear the existing string
856: * with the same name, if there is one. New content will be
857: * added from roff_block_text() in multiline mode.
858: */
1.107 kristaps 859:
1.106 kristaps 860: if (ROFF_de == tok)
1.108 schwarze 861: roff_setstr(r, name, "", 0);
1.76 kristaps 862:
1.79 kristaps 863: if ('\0' == (*bufp)[pos])
1.78 kristaps 864: return(ROFF_IGN);
865:
1.107 kristaps 866: /* If present, process the custom end-of-line marker. */
867:
1.78 kristaps 868: sv = pos;
1.131 schwarze 869: while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
1.78 kristaps 870: pos++;
871:
872: /*
873: * Note: groff does NOT like escape characters in the input.
874: * Instead of detecting this, we're just going to let it fly and
875: * to hell with it.
876: */
877:
878: assert(pos > sv);
879: sz = (size_t)(pos - sv);
880:
1.79 kristaps 881: if (1 == sz && '.' == (*bufp)[sv])
882: return(ROFF_IGN);
883:
1.98 schwarze 884: r->last->end = mandoc_malloc(sz + 1);
1.78 kristaps 885:
886: memcpy(r->last->end, *bufp + sv, sz);
887: r->last->end[(int)sz] = '\0';
888:
1.77 kristaps 889: if ((*bufp)[pos])
1.128 kristaps 890: mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
1.74 kristaps 891:
1.78 kristaps 892: return(ROFF_IGN);
893: }
894:
895:
896: /* ARGSUSED */
897: static enum rofferr
1.80 kristaps 898: roff_block_sub(ROFF_ARGS)
1.79 kristaps 899: {
900: enum rofft t;
901: int i, j;
902:
903: /*
904: * First check whether a custom macro exists at this level. If
905: * it does, then check against it. This is some of groff's
906: * stranger behaviours. If we encountered a custom end-scope
907: * tag and that tag also happens to be a "real" macro, then we
908: * need to try interpreting it again as a real macro. If it's
909: * not, then return ignore. Else continue.
910: */
911:
912: if (r->last->end) {
1.130 kristaps 913: for (i = pos, j = 0; r->last->end[j]; j++, i++)
1.79 kristaps 914: if ((*bufp)[i] != r->last->end[j])
915: break;
916:
917: if ('\0' == r->last->end[j] &&
918: ('\0' == (*bufp)[i] ||
919: ' ' == (*bufp)[i] ||
920: '\t' == (*bufp)[i])) {
921: roffnode_pop(r);
922: roffnode_cleanscope(r);
923:
1.130 kristaps 924: while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
925: i++;
926:
927: pos = i;
1.106 kristaps 928: if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1.79 kristaps 929: return(ROFF_RERUN);
930: return(ROFF_IGN);
931: }
932: }
933:
934: /*
935: * If we have no custom end-query or lookup failed, then try
936: * pulling it out of the hashtable.
937: */
938:
1.137 schwarze 939: t = roff_parse(r, *bufp, &pos);
1.79 kristaps 940:
1.106 kristaps 941: /*
942: * Macros other than block-end are only significant
943: * in `de' blocks; elsewhere, simply throw them away.
944: */
945: if (ROFF_cblock != t) {
946: if (ROFF_de == tok)
947: roff_setstr(r, r->last->name, *bufp + ppos, 1);
1.79 kristaps 948: return(ROFF_IGN);
1.106 kristaps 949: }
1.79 kristaps 950:
951: assert(roffs[t].proc);
1.90 kristaps 952: return((*roffs[t].proc)(r, t, bufp, szp,
953: ln, ppos, pos, offs));
1.79 kristaps 954: }
955:
956:
957: /* ARGSUSED */
958: static enum rofferr
1.80 kristaps 959: roff_block_text(ROFF_ARGS)
1.78 kristaps 960: {
961:
1.106 kristaps 962: if (ROFF_de == tok)
963: roff_setstr(r, r->last->name, *bufp + pos, 1);
964:
1.78 kristaps 965: return(ROFF_IGN);
966: }
967:
968:
969: /* ARGSUSED */
970: static enum rofferr
1.82 kristaps 971: roff_cond_sub(ROFF_ARGS)
972: {
973: enum rofft t;
974: enum roffrule rr;
1.139 kristaps 975: char *ep;
1.82 kristaps 976:
977: rr = r->last->rule;
1.139 kristaps 978: roffnode_cleanscope(r);
1.82 kristaps 979:
1.139 kristaps 980: /*
981: * If the macro is unknown, first check if it contains a closing
982: * delimiter `\}'. If it does, close out our scope and return
983: * the currently-scoped rule (ignore or continue). Else, drop
984: * into the currently-scoped rule.
1.87 kristaps 985: */
986:
1.106 kristaps 987: if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1.139 kristaps 988: ep = &(*bufp)[pos];
989: for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
990: ep++;
991: if ('}' != *ep)
992: continue;
1.144 kristaps 993:
994: /*
995: * Make the \} go away.
996: * This is a little haphazard, as it's not quite
997: * clear how nroff does this.
998: * If we're at the end of line, then just chop
999: * off the \} and resize the buffer.
1000: * If we aren't, then conver it to spaces.
1001: */
1002:
1003: if ('\0' == *(ep + 1)) {
1004: *--ep = '\0';
1005: *szp -= 2;
1006: } else
1007: *(ep - 1) = *ep = ' ';
1008:
1.139 kristaps 1009: roff_ccond(r, ROFF_ccond, bufp, szp,
1010: ln, pos, pos + 2, offs);
1011: break;
1012: }
1.82 kristaps 1013: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.100 kristaps 1014: }
1.82 kristaps 1015:
1016: /*
1017: * A denied conditional must evaluate its children if and only
1018: * if they're either structurally required (such as loops and
1019: * conditionals) or a closing macro.
1020: */
1.139 kristaps 1021:
1.82 kristaps 1022: if (ROFFRULE_DENY == rr)
1023: if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1024: if (ROFF_ccond != t)
1025: return(ROFF_IGN);
1026:
1027: assert(roffs[t].proc);
1.90 kristaps 1028: return((*roffs[t].proc)(r, t, bufp, szp,
1029: ln, ppos, pos, offs));
1.82 kristaps 1030: }
1031:
1032: /* ARGSUSED */
1033: static enum rofferr
1034: roff_cond_text(ROFF_ARGS)
1.78 kristaps 1035: {
1.140 kristaps 1036: char *ep;
1.82 kristaps 1037: enum roffrule rr;
1038:
1039: rr = r->last->rule;
1.140 kristaps 1040: roffnode_cleanscope(r);
1.82 kristaps 1041:
1.140 kristaps 1042: ep = &(*bufp)[pos];
1043: for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1044: ep++;
1045: if ('}' != *ep)
1046: continue;
1047: *ep = '&';
1048: roff_ccond(r, ROFF_ccond, bufp, szp,
1049: ln, pos, pos + 2, offs);
1.78 kristaps 1050: }
1.82 kristaps 1051: return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1.74 kristaps 1052: }
1053:
1.88 kristaps 1054: static enum roffrule
1055: roff_evalcond(const char *v, int *pos)
1056: {
1057:
1058: switch (v[*pos]) {
1059: case ('n'):
1060: (*pos)++;
1061: return(ROFFRULE_ALLOW);
1062: case ('e'):
1063: /* FALLTHROUGH */
1064: case ('o'):
1065: /* FALLTHROUGH */
1066: case ('t'):
1067: (*pos)++;
1068: return(ROFFRULE_DENY);
1069: default:
1070: break;
1071: }
1072:
1073: while (v[*pos] && ' ' != v[*pos])
1074: (*pos)++;
1075: return(ROFFRULE_DENY);
1076: }
1077:
1.75 kristaps 1078: /* ARGSUSED */
1.74 kristaps 1079: static enum rofferr
1.103 kristaps 1080: roff_line_ignore(ROFF_ARGS)
1.89 kristaps 1081: {
1.123 schwarze 1082:
1083: if (ROFF_it == tok)
1.128 kristaps 1084: mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1.89 kristaps 1085:
1086: return(ROFF_IGN);
1087: }
1088:
1.104 kristaps 1089: /* ARGSUSED */
1090: static enum rofferr
1.82 kristaps 1091: roff_cond(ROFF_ARGS)
1.74 kristaps 1092: {
1.77 kristaps 1093: int sv;
1.88 kristaps 1094: enum roffrule rule;
1.74 kristaps 1095:
1.134 kristaps 1096: /*
1097: * An `.el' has no conditional body: it will consume the value
1098: * of the current rstack entry set in prior `ie' calls or
1099: * defaults to DENY.
1100: *
1101: * If we're not an `el', however, then evaluate the conditional.
1102: */
1.133 kristaps 1103:
1.134 kristaps 1104: rule = ROFF_el == tok ?
1105: (r->rstackpos < 0 ?
1106: ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1107: roff_evalcond(*bufp, &pos);
1.77 kristaps 1108:
1109: sv = pos;
1.75 kristaps 1110: while (' ' == (*bufp)[pos])
1111: pos++;
1.74 kristaps 1112:
1.77 kristaps 1113: /*
1114: * Roff is weird. If we have just white-space after the
1115: * conditional, it's considered the BODY and we exit without
1116: * really doing anything. Warn about this. It's probably
1117: * wrong.
1118: */
1.88 kristaps 1119:
1.77 kristaps 1120: if ('\0' == (*bufp)[pos] && sv != pos) {
1.128 kristaps 1121: mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1.107 kristaps 1122: return(ROFF_IGN);
1.77 kristaps 1123: }
1124:
1.106 kristaps 1125: roffnode_push(r, tok, NULL, ln, ppos);
1.77 kristaps 1126:
1.88 kristaps 1127: r->last->rule = rule;
1128:
1.134 kristaps 1129: /*
1130: * An if-else will put the NEGATION of the current evaluated
1131: * conditional into the stack of rules.
1132: */
1133:
1.84 schwarze 1134: if (ROFF_ie == tok) {
1.134 kristaps 1135: if (r->rstackpos == RSTACK_MAX - 1) {
1136: mandoc_msg(MANDOCERR_MEM,
1137: r->parse, ln, ppos, NULL);
1138: return(ROFF_ERR);
1139: }
1140: r->rstack[++r->rstackpos] =
1141: ROFFRULE_DENY == r->last->rule ?
1142: ROFFRULE_ALLOW : ROFFRULE_DENY;
1.82 kristaps 1143: }
1.88 kristaps 1144:
1145: /* If the parent has false as its rule, then so do we. */
1146:
1.109 kristaps 1147: if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1.84 schwarze 1148: r->last->rule = ROFFRULE_DENY;
1.88 kristaps 1149:
1150: /*
1151: * Determine scope. If we're invoked with "\{" trailing the
1152: * conditional, then we're in a multiline scope. Else our scope
1153: * expires on the next line.
1154: */
1.74 kristaps 1155:
1.75 kristaps 1156: r->last->endspan = 1;
1157:
1158: if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1159: r->last->endspan = -1;
1160: pos += 2;
1.109 kristaps 1161: }
1.74 kristaps 1162:
1.77 kristaps 1163: /*
1164: * If there are no arguments on the line, the next-line scope is
1165: * assumed.
1166: */
1167:
1.75 kristaps 1168: if ('\0' == (*bufp)[pos])
1169: return(ROFF_IGN);
1.77 kristaps 1170:
1171: /* Otherwise re-run the roff parser after recalculating. */
1.74 kristaps 1172:
1.75 kristaps 1173: *offs = pos;
1174: return(ROFF_RERUN);
1.83 schwarze 1175: }
1176:
1177:
1178: /* ARGSUSED */
1179: static enum rofferr
1.92 schwarze 1180: roff_ds(ROFF_ARGS)
1181: {
1.96 kristaps 1182: char *name, *string;
1183:
1184: /*
1185: * A symbol is named by the first word following the macro
1186: * invocation up to a space. Its value is anything after the
1187: * name's trailing whitespace and optional double-quote. Thus,
1188: *
1189: * [.ds foo "bar " ]
1190: *
1191: * will have `bar " ' as its value.
1192: */
1.92 schwarze 1193:
1.121 schwarze 1194: string = *bufp + pos;
1195: name = roff_getname(r, &string, ln, pos);
1.92 schwarze 1196: if ('\0' == *name)
1197: return(ROFF_IGN);
1198:
1.121 schwarze 1199: /* Read past initial double-quote. */
1200: if ('"' == *string)
1.92 schwarze 1201: string++;
1202:
1.96 kristaps 1203: /* The rest is the value. */
1.106 kristaps 1204: roff_setstr(r, name, string, 0);
1.92 schwarze 1205: return(ROFF_IGN);
1206: }
1207:
1.147 kristaps 1208: int
1209: roff_regisset(const struct roff *r, enum regs reg)
1210: {
1211:
1212: return(r->regs[(int)reg].set);
1213: }
1214:
1215: unsigned int
1216: roff_regget(const struct roff *r, enum regs reg)
1217: {
1218:
1219: return(r->regs[(int)reg].u);
1220: }
1221:
1222: void
1223: roff_regunset(struct roff *r, enum regs reg)
1224: {
1225:
1226: r->regs[(int)reg].set = 0;
1227: }
1.92 schwarze 1228:
1229: /* ARGSUSED */
1230: static enum rofferr
1.89 kristaps 1231: roff_nr(ROFF_ARGS)
1.83 schwarze 1232: {
1.121 schwarze 1233: const char *key;
1234: char *val;
1.138 kristaps 1235: int iv;
1.89 kristaps 1236:
1.121 schwarze 1237: val = *bufp + pos;
1238: key = roff_getname(r, &val, ln, pos);
1.89 kristaps 1239:
1240: if (0 == strcmp(key, "nS")) {
1.147 kristaps 1241: r->regs[(int)REG_nS].set = 1;
1.149 kristaps 1242: if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1.147 kristaps 1243: r->regs[(int)REG_nS].u = (unsigned)iv;
1.138 kristaps 1244: else
1.147 kristaps 1245: r->regs[(int)REG_nS].u = 0u;
1.109 kristaps 1246: }
1247:
1.122 schwarze 1248: return(ROFF_IGN);
1249: }
1250:
1251: /* ARGSUSED */
1252: static enum rofferr
1253: roff_rm(ROFF_ARGS)
1254: {
1255: const char *name;
1256: char *cp;
1257:
1258: cp = *bufp + pos;
1259: while ('\0' != *cp) {
1.127 kristaps 1260: name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1.122 schwarze 1261: if ('\0' != *name)
1262: roff_setstr(r, name, NULL, 0);
1263: }
1.109 kristaps 1264: return(ROFF_IGN);
1265: }
1266:
1267: /* ARGSUSED */
1268: static enum rofferr
1269: roff_TE(ROFF_ARGS)
1270: {
1271:
1272: if (NULL == r->tbl)
1.128 kristaps 1273: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.115 kristaps 1274: else
1.151 kristaps 1275: tbl_end(&r->tbl);
1.109 kristaps 1276:
1.112 kristaps 1277: return(ROFF_IGN);
1278: }
1279:
1280: /* ARGSUSED */
1281: static enum rofferr
1282: roff_T_(ROFF_ARGS)
1283: {
1284:
1285: if (NULL == r->tbl)
1.128 kristaps 1286: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.112 kristaps 1287: else
1.116 kristaps 1288: tbl_restart(ppos, ln, r->tbl);
1.112 kristaps 1289:
1.109 kristaps 1290: return(ROFF_IGN);
1291: }
1292:
1.156 kristaps 1293: #if 0
1294: static int
1.151 kristaps 1295: roff_closeeqn(struct roff *r)
1296: {
1297:
1298: return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1299: }
1.156 kristaps 1300: #endif
1.151 kristaps 1301:
1.156 kristaps 1302: static void
1.151 kristaps 1303: roff_openeqn(struct roff *r, const char *name, int line,
1304: int offs, const char *buf)
1.125 kristaps 1305: {
1.151 kristaps 1306: struct eqn_node *e;
1307: int poff;
1.125 kristaps 1308:
1309: assert(NULL == r->eqn);
1.151 kristaps 1310: e = eqn_alloc(name, offs, line, r->parse);
1.125 kristaps 1311:
1312: if (r->last_eqn)
1313: r->last_eqn->next = e;
1314: else
1315: r->first_eqn = r->last_eqn = e;
1316:
1317: r->eqn = r->last_eqn = e;
1.151 kristaps 1318:
1319: if (buf) {
1320: poff = 0;
1321: eqn_read(&r->eqn, line, buf, offs, &poff);
1322: }
1323: }
1324:
1325: /* ARGSUSED */
1326: static enum rofferr
1327: roff_EQ(ROFF_ARGS)
1328: {
1329:
1330: roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1.125 kristaps 1331: return(ROFF_IGN);
1332: }
1333:
1334: /* ARGSUSED */
1335: static enum rofferr
1336: roff_EN(ROFF_ARGS)
1337: {
1338:
1.128 kristaps 1339: mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1.125 kristaps 1340: return(ROFF_IGN);
1341: }
1342:
1343: /* ARGSUSED */
1344: static enum rofferr
1.109 kristaps 1345: roff_TS(ROFF_ARGS)
1346: {
1.118 kristaps 1347: struct tbl_node *t;
1.89 kristaps 1348:
1.115 kristaps 1349: if (r->tbl) {
1.128 kristaps 1350: mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1.151 kristaps 1351: tbl_end(&r->tbl);
1.115 kristaps 1352: }
1.83 schwarze 1353:
1.128 kristaps 1354: t = tbl_alloc(ppos, ln, r->parse);
1.113 kristaps 1355:
1356: if (r->last_tbl)
1357: r->last_tbl->next = t;
1358: else
1359: r->first_tbl = r->last_tbl = t;
1360:
1361: r->tbl = r->last_tbl = t;
1.83 schwarze 1362: return(ROFF_IGN);
1.92 schwarze 1363: }
1364:
1.105 kristaps 1365: /* ARGSUSED */
1366: static enum rofferr
1.164 kristaps 1367: roff_tr(ROFF_ARGS)
1368: {
1369: const char *p, *first, *second;
1370: size_t fsz, ssz;
1371: enum mandoc_esc esc;
1372:
1373: p = *bufp + pos;
1374:
1375: if ('\0' == *p) {
1376: mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1377: return(ROFF_IGN);
1378: }
1379:
1380: while ('\0' != *p) {
1381: fsz = ssz = 1;
1382:
1383: first = p++;
1384: if ('\\' == *first) {
1385: esc = mandoc_escape(&p, NULL, NULL);
1386: if (ESCAPE_ERROR == esc) {
1387: mandoc_msg
1388: (MANDOCERR_BADESCAPE, r->parse,
1389: ln, (int)(p - *bufp), NULL);
1390: return(ROFF_IGN);
1391: }
1392: fsz = (size_t)(p - first);
1393: }
1394:
1395: second = p++;
1396: if ('\\' == *second) {
1397: esc = mandoc_escape(&p, NULL, NULL);
1398: if (ESCAPE_ERROR == esc) {
1399: mandoc_msg
1400: (MANDOCERR_BADESCAPE, r->parse,
1401: ln, (int)(p - *bufp), NULL);
1402: return(ROFF_IGN);
1403: }
1404: ssz = (size_t)(p - second);
1.165 kristaps 1405: } else if ('\0' == *second) {
1.164 kristaps 1406: mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1407: ln, (int)(p - *bufp), NULL);
1408: second = " ";
1.165 kristaps 1409: p--;
1.164 kristaps 1410: }
1411:
1.167 ! kristaps 1412: if (fsz > 1) {
! 1413: roff_setstrn(&r->xmbtab, first,
! 1414: fsz, second, ssz, 0);
! 1415: continue;
! 1416: }
! 1417:
! 1418: if (NULL == r->xtab)
! 1419: r->xtab = mandoc_calloc
! 1420: (128, sizeof(struct roffstr));
! 1421:
! 1422: free(r->xtab[(int)*first].p);
! 1423: r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
! 1424: r->xtab[(int)*first].sz = ssz;
1.164 kristaps 1425: }
1426:
1427: return(ROFF_IGN);
1428: }
1429:
1430: /* ARGSUSED */
1431: static enum rofferr
1.105 kristaps 1432: roff_so(ROFF_ARGS)
1433: {
1434: char *name;
1435:
1.128 kristaps 1436: mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1.105 kristaps 1437:
1438: /*
1439: * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1440: * opening anything that's not in our cwd or anything beneath
1441: * it. Thus, explicitly disallow traversing up the file-system
1442: * or using absolute paths.
1443: */
1444:
1445: name = *bufp + pos;
1446: if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1.128 kristaps 1447: mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1.105 kristaps 1448: return(ROFF_ERR);
1449: }
1450:
1451: *offs = pos;
1452: return(ROFF_SO);
1453: }
1.92 schwarze 1454:
1.106 kristaps 1455: /* ARGSUSED */
1456: static enum rofferr
1457: roff_userdef(ROFF_ARGS)
1.99 kristaps 1458: {
1.106 kristaps 1459: const char *arg[9];
1460: char *cp, *n1, *n2;
1.119 schwarze 1461: int i;
1.106 kristaps 1462:
1463: /*
1464: * Collect pointers to macro argument strings
1465: * and null-terminate them.
1466: */
1467: cp = *bufp + pos;
1.119 schwarze 1468: for (i = 0; i < 9; i++)
1.120 schwarze 1469: arg[i] = '\0' == *cp ? "" :
1.136 kristaps 1470: mandoc_getarg(r->parse, &cp, ln, &pos);
1.99 kristaps 1471:
1.106 kristaps 1472: /*
1473: * Expand macro arguments.
1.99 kristaps 1474: */
1.106 kristaps 1475: *szp = 0;
1476: n1 = cp = mandoc_strdup(r->current_string);
1477: while (NULL != (cp = strstr(cp, "\\$"))) {
1478: i = cp[2] - '1';
1479: if (0 > i || 8 < i) {
1480: /* Not an argument invocation. */
1481: cp += 2;
1482: continue;
1483: }
1484:
1485: *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1486: n2 = mandoc_malloc(*szp);
1487:
1488: strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1489: strlcat(n2, arg[i], *szp);
1490: strlcat(n2, cp + 3, *szp);
1491:
1492: cp = n2 + (cp - n1);
1493: free(n1);
1494: n1 = n2;
1.99 kristaps 1495: }
1496:
1.106 kristaps 1497: /*
1498: * Replace the macro invocation
1499: * by the expanded macro.
1500: */
1501: free(*bufp);
1502: *bufp = n1;
1503: if (0 == *szp)
1504: *szp = strlen(*bufp) + 1;
1505:
1506: return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1507: ROFF_REPARSE : ROFF_APPEND);
1.99 kristaps 1508: }
1.121 schwarze 1509:
1510: static char *
1511: roff_getname(struct roff *r, char **cpp, int ln, int pos)
1512: {
1513: char *name, *cp;
1514:
1515: name = *cpp;
1516: if ('\0' == *name)
1517: return(name);
1518:
1519: /* Read until end of name. */
1520: for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1521: if ('\\' != *cp)
1522: continue;
1523: cp++;
1524: if ('\\' == *cp)
1525: continue;
1.128 kristaps 1526: mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1.121 schwarze 1527: *cp = '\0';
1528: name = cp;
1529: }
1530:
1531: /* Nil-terminate name. */
1532: if ('\0' != *cp)
1533: *(cp++) = '\0';
1534:
1535: /* Read past spaces. */
1536: while (' ' == *cp)
1537: cp++;
1538:
1539: *cpp = cp;
1540: return(name);
1541: }
1542:
1.106 kristaps 1543: /*
1544: * Store *string into the user-defined string called *name.
1545: * In multiline mode, append to an existing entry and append '\n';
1546: * else replace the existing entry, if there is one.
1547: * To clear an existing entry, call with (*r, *name, NULL, 0).
1548: */
1.94 kristaps 1549: static void
1.106 kristaps 1550: roff_setstr(struct roff *r, const char *name, const char *string,
1551: int multiline)
1.92 schwarze 1552: {
1.164 kristaps 1553:
1554: roff_setstrn(&r->strtab, name, strlen(name), string,
1555: string ? strlen(string) : 0, multiline);
1556: }
1557:
1558: static void
1.166 kristaps 1559: roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1.164 kristaps 1560: const char *string, size_t stringsz, int multiline)
1561: {
1.166 kristaps 1562: struct roffkv *n;
1.164 kristaps 1563: char *c;
1564: int i;
1565: size_t oldch, newch;
1.92 schwarze 1566:
1.106 kristaps 1567: /* Search for an existing string with the same name. */
1.164 kristaps 1568: n = *r;
1569:
1.166 kristaps 1570: while (n && strcmp(name, n->key.p))
1.92 schwarze 1571: n = n->next;
1.94 kristaps 1572:
1573: if (NULL == n) {
1.106 kristaps 1574: /* Create a new string table entry. */
1.166 kristaps 1575: n = mandoc_malloc(sizeof(struct roffkv));
1576: n->key.p = mandoc_strndup(name, namesz);
1577: n->key.sz = namesz;
1578: n->val.p = NULL;
1579: n->val.sz = 0;
1.164 kristaps 1580: n->next = *r;
1581: *r = n;
1.106 kristaps 1582: } else if (0 == multiline) {
1583: /* In multiline mode, append; else replace. */
1.166 kristaps 1584: free(n->val.p);
1585: n->val.p = NULL;
1586: n->val.sz = 0;
1.106 kristaps 1587: }
1588:
1589: if (NULL == string)
1590: return;
1591:
1592: /*
1593: * One additional byte for the '\n' in multiline mode,
1594: * and one for the terminating '\0'.
1595: */
1.164 kristaps 1596: newch = stringsz + (multiline ? 2u : 1u);
1597:
1.166 kristaps 1598: if (NULL == n->val.p) {
1599: n->val.p = mandoc_malloc(newch);
1600: *n->val.p = '\0';
1.106 kristaps 1601: oldch = 0;
1602: } else {
1.166 kristaps 1603: oldch = n->val.sz;
1604: n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1.106 kristaps 1605: }
1606:
1607: /* Skip existing content in the destination buffer. */
1.166 kristaps 1608: c = n->val.p + (int)oldch;
1.106 kristaps 1609:
1610: /* Append new content to the destination buffer. */
1.164 kristaps 1611: i = 0;
1612: while (i < (int)stringsz) {
1.106 kristaps 1613: /*
1614: * Rudimentary roff copy mode:
1615: * Handle escaped backslashes.
1616: */
1.164 kristaps 1617: if ('\\' == string[i] && '\\' == string[i + 1])
1618: i++;
1619: *c++ = string[i++];
1.106 kristaps 1620: }
1.94 kristaps 1621:
1.106 kristaps 1622: /* Append terminating bytes. */
1623: if (multiline)
1624: *c++ = '\n';
1.163 kristaps 1625:
1.106 kristaps 1626: *c = '\0';
1.166 kristaps 1627: n->val.sz = (int)(c - n->val.p);
1.92 schwarze 1628: }
1629:
1.94 kristaps 1630: static const char *
1631: roff_getstrn(const struct roff *r, const char *name, size_t len)
1.92 schwarze 1632: {
1.166 kristaps 1633: const struct roffkv *n;
1.92 schwarze 1634:
1.164 kristaps 1635: for (n = r->strtab; n; n = n->next)
1.166 kristaps 1636: if (0 == strncmp(name, n->key.p, len) &&
1637: '\0' == n->key.p[(int)len])
1638: return(n->val.p);
1.94 kristaps 1639:
1.157 kristaps 1640: return(NULL);
1.92 schwarze 1641: }
1642:
1.94 kristaps 1643: static void
1.167 ! kristaps 1644: roff_freestr(struct roffkv *r)
1.92 schwarze 1645: {
1.166 kristaps 1646: struct roffkv *n, *nn;
1.92 schwarze 1647:
1.167 ! kristaps 1648: for (n = r; n; n = nn) {
1.166 kristaps 1649: free(n->key.p);
1650: free(n->val.p);
1.92 schwarze 1651: nn = n->next;
1652: free(n);
1653: }
1.114 kristaps 1654: }
1655:
1656: const struct tbl_span *
1657: roff_span(const struct roff *r)
1658: {
1659:
1660: return(r->tbl ? tbl_span(r->tbl) : NULL);
1.125 kristaps 1661: }
1662:
1663: const struct eqn *
1664: roff_eqn(const struct roff *r)
1665: {
1666:
1667: return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1.151 kristaps 1668: }
1669:
1670: char
1671: roff_eqndelim(const struct roff *r)
1672: {
1673:
1674: return('\0');
1.164 kristaps 1675: }
1676:
1677: /*
1678: * Duplicate an input string, making the appropriate character
1679: * conversations (as stipulated by `tr') along the way.
1680: * Returns a heap-allocated string with all the replacements made.
1681: */
1682: char *
1683: roff_strdup(const struct roff *r, const char *p)
1684: {
1.166 kristaps 1685: const struct roffkv *cp;
1.164 kristaps 1686: char *res;
1687: const char *pp;
1688: size_t ssz, sz;
1689: enum mandoc_esc esc;
1690:
1.167 ! kristaps 1691: if (NULL == r->xmbtab && NULL == r->xtab)
1.164 kristaps 1692: return(mandoc_strdup(p));
1693: else if ('\0' == *p)
1694: return(mandoc_strdup(""));
1695:
1696: /*
1697: * Step through each character looking for term matches
1698: * (remember that a `tr' can be invoked with an escape, which is
1699: * a glyph but the escape is multi-character).
1700: * We only do this if the character hash has been initialised
1701: * and the string is >0 length.
1702: */
1703:
1704: res = NULL;
1705: ssz = 0;
1706:
1707: while ('\0' != *p) {
1.167 ! kristaps 1708: if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
! 1709: sz = r->xtab[(int)*p].sz;
! 1710: res = mandoc_realloc(res, ssz + sz + 1);
! 1711: memcpy(res + ssz, r->xtab[(int)*p].p, sz);
! 1712: ssz += sz;
! 1713: p++;
! 1714: continue;
! 1715: } else if ('\\' != *p) {
! 1716: res = mandoc_realloc(res, ssz + 2);
! 1717: res[ssz++] = *p++;
! 1718: continue;
! 1719: }
! 1720:
1.164 kristaps 1721: /* Search for term matches. */
1.167 ! kristaps 1722: for (cp = r->xmbtab; cp; cp = cp->next)
1.166 kristaps 1723: if (0 == strncmp(p, cp->key.p, cp->key.sz))
1.164 kristaps 1724: break;
1725:
1726: if (NULL != cp) {
1727: /*
1728: * A match has been found.
1729: * Append the match to the array and move
1730: * forward by its keysize.
1731: */
1.166 kristaps 1732: res = mandoc_realloc
1733: (res, ssz + cp->val.sz + 1);
1734: memcpy(res + ssz, cp->val.p, cp->val.sz);
1735: ssz += cp->val.sz;
1736: p += (int)cp->key.sz;
1.164 kristaps 1737: continue;
1738: }
1739:
1.167 ! kristaps 1740: /*
! 1741: * Handle escapes carefully: we need to copy
! 1742: * over just the escape itself, or else we might
! 1743: * do replacements within the escape itself.
! 1744: * Make sure to pass along the bogus string.
! 1745: */
! 1746: pp = p++;
! 1747: esc = mandoc_escape(&p, NULL, NULL);
! 1748: if (ESCAPE_ERROR == esc) {
! 1749: sz = strlen(pp);
1.164 kristaps 1750: res = mandoc_realloc(res, ssz + sz + 1);
1751: memcpy(res + ssz, pp, sz);
1.167 ! kristaps 1752: break;
1.164 kristaps 1753: }
1.167 ! kristaps 1754: /*
! 1755: * We bail out on bad escapes.
! 1756: * No need to warn: we already did so when
! 1757: * roff_res() was called.
! 1758: */
! 1759: sz = (int)(p - pp);
! 1760: res = mandoc_realloc(res, ssz + sz + 1);
! 1761: memcpy(res + ssz, pp, sz);
! 1762: ssz += sz;
1.164 kristaps 1763: }
1764:
1765: res[(int)ssz] = '\0';
1766: return(res);
1.74 kristaps 1767: }
CVSweb