Annotation of texi2mdoc/util.c, Revision 1.32
1.32 ! kristaps 1: /* $Id: util.c,v 1.31 2015/03/12 04:24:19 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <time.h>
29: #include <unistd.h>
30:
31: #include "extern.h"
32:
33: /*
1.29 kristaps 34: * Table of macros.
35: * These ABSOLUTELY MUST BE 2 or three characters long.
36: */
37: static const char *const mdocs[] = {
38: "Ap", "Dd", "Dt", "Os",
39: "Sh", "Ss", "Pp", "D1",
40: "Dl", "Bd", "Ed", "Bl",
41: "El", "It", "Ad", "An",
42: "Ar", "Cd", "Cm", "Dv",
43: "Er", "Ev", "Ex", "Fa",
44: "Fd", "Fl", "Fn", "Ft",
45: "Ic", "In", "Li", "Nd",
46: "Nm", "Op", "Ot", "Pa",
47: "Rv", "St", "Va", "Vt",
48: "Xr", "%A", "%B", "%D",
49: "%I", "%J", "%N", "%O",
50: "%P", "%R", "%T", "%V",
51: "Ac", "Ao", "Aq", "At",
52: "Bc", "Bf", "Bo", "Bq",
53: "Bsx", "Bx", "Db", "Dc",
54: "Do", "Dq", "Ec", "Ef",
55: "Em", "Eo", "Fx", "Ms",
56: "No", "Ns", "Nx", "Ox",
57: "Pc", "Pf", "Po", "Pq",
58: "Qc", "Ql", "Qo", "Qq",
59: "Re", "Rs", "Sc", "So",
60: "Sq", "Sm", "Sx", "Sy",
61: "Tn", "Ux", "Xc", "Xo",
62: "Fo", "Fc", "Oo", "Oc",
63: "Bk", "Ek", "Bt", "Hf",
64: "Fr", "Ud", "Lb", "Lp",
65: "Lk", "Mt", "Brq", "Bro",
66: "Brc", "%C", "Es", "En",
67: "Dx", "%Q", "br", "sp",
68: "%U", "Ta", "ll", NULL,
69: };
70:
71: /*
1.1 kristaps 72: * Unmap the top-most file in the stack of files currently opened (that
73: * is, nested calls to parsefile()).
74: */
75: void
76: texifilepop(struct texi *p)
77: {
78: struct texifile *f;
79:
80: assert(p->filepos > 0);
81: f = &p->files[--p->filepos];
1.14 kristaps 82: free(f->map);
1.1 kristaps 83: }
84:
1.7 kristaps 85: static void
86: teximacrofree(struct teximacro *p)
87: {
88: size_t i;
89:
90: for (i = 0; i < p->argsz; i++)
91: free(p->args[i]);
92:
93: free(p->args);
94: free(p->key);
95: free(p->value);
96: }
97:
98: static void
99: texivaluefree(struct texivalue *p)
100: {
101:
102: free(p->key);
103: free(p->value);
104: }
105:
1.30 kristaps 106: static void
107: texidex_free(struct texidex *p)
108: {
109: size_t i;
110:
111: for (i = 0; i < p->indexsz; i++)
1.31 kristaps 112: free(p->index[i].term);
1.30 kristaps 113:
114: free(p->index);
115: free(p->name);
116: p->index = NULL;
117: p->indexsz = 0;
118: }
119:
120: /*
121: * Add the text beginning at "index" and of "sz" bytes to the index
122: * named "tok" with name size "toksz".
123: * This will also output the necessary mdoc(7) to construct the index.
124: */
125: void
126: texindex(struct texi *p, const char *tok,
127: size_t toksz, const char *index, size_t sz)
128: {
1.31 kristaps 129: size_t i, isz;
1.30 kristaps 130: #ifdef HAVE_INDEX
131: char *cp;
132: #endif
133:
134: if (0 == sz) {
135: texiwarn(p, "zero-length index entry");
136: return;
137: }
138:
139: /* Look for the index. (Must be found.) */
140: for (i = 0; i < p->indexsz; i++) {
141: if (strlen(p->indexs[i].name) != toksz)
142: continue;
143: if (strncmp(p->indexs[i].name, tok, toksz))
144: continue;
145: break;
146: }
147:
148: assert(i < p->indexsz);
1.31 kristaps 149: isz = p->indexs[i].indexsz;
1.30 kristaps 150: /* Reallocate index's terms. */
151: p->indexs[i].index = realloc
152: (p->indexs[i].index,
1.31 kristaps 153: (isz + 1) * sizeof(struct texiterm));
1.30 kristaps 154: if (NULL == p->indexs[i].index)
155: texiabort(p, NULL);
156:
157: /* Add term to term array. */
1.32 ! kristaps 158: p->indexs[i].index[isz].chapter = p->nodecur;
1.31 kristaps 159: p->indexs[i].index[isz].term = malloc(sz + 1);
160: if (NULL == p->indexs[i].index[isz].term)
1.30 kristaps 161: texiabort(p, NULL);
1.31 kristaps 162: memcpy(p->indexs[i].index[isz].term, index, sz);
163: p->indexs[i].index[isz].term[sz] = '\0';
1.30 kristaps 164:
165: /* Output mdoc(7) for index. */
166: #ifdef HAVE_INDEX
167: p->seenvs = -1;
168: teximacroopen(p, "Ix");
169: texiputchars(p, "idx");
170: texiputchars(p, p->indexs[i].name);
1.31 kristaps 171: cp = p->indexs[i].index[isz].term;
1.32 ! kristaps 172: while ('\n' != *cp)
1.30 kristaps 173: texiputchar(p, *cp++);
174: teximacroclose(p);
175: #endif
176: p->indexs[i].indexsz++;
177: }
178:
179: /*
180: * Add an index entry named "tok" of length "sz".
181: * This usually consists of two letters, e.g., "cp" or "vr".
182: * This does nothing if the index exists or is zero-sized.
183: */
184: void
185: texindex_add(struct texi *p, const char *tok, size_t sz)
186: {
187: size_t i;
188: char *cp;
189:
190: if (0 == sz)
191: return;
192:
193: /* Make sure we don't have a duplicate. */
194: for (i = 0; i < p->indexsz; i++) {
195: if (strlen(p->indexs[i].name) != sz)
196: continue;
197: if (strncmp(p->indexs[i].name, tok, sz))
198: continue;
199: return;
200: }
201:
202: /* Reallocate indices. */
203: p->indexs = realloc(p->indexs,
1.31 kristaps 204: sizeof(struct texidex) *
205: (p->indexsz + 1));
1.30 kristaps 206: if (NULL == p->indexs)
207: texiabort(p, NULL);
208: if (NULL == (cp = malloc(sz + 1)))
209: texiabort(p, NULL);
210: memcpy(cp, tok, sz);
211: cp[sz] = '\0';
212: p->indexs[p->indexsz].name = cp;
213: p->indexs[p->indexsz].index = NULL;
214: p->indexs[p->indexsz].indexsz = 0;
215: p->indexsz++;
216: }
217:
1.1 kristaps 218: /*
219: * Unmap all files that we're currently using and free all resources
220: * that we've allocated during the parse.
221: * The utility should exit(...) after this is called.
222: */
223: void
224: texiexit(struct texi *p)
225: {
226: size_t i;
227:
228: /* Make sure we're newline-terminated. */
229: if (p->outcol)
1.20 kristaps 230: fputc('\n', p->outfile);
231: if (NULL != p->chapters)
232: teximdocclose(p, 1);
1.1 kristaps 233:
234: /* Unmap all files. */
235: while (p->filepos > 0)
236: texifilepop(p);
237:
1.7 kristaps 238: for (i = 0; i < p->macrosz; i++)
239: teximacrofree(&p->macros[i]);
1.1 kristaps 240: for (i = 0; i < p->dirsz; i++)
241: free(p->dirs[i]);
1.4 kristaps 242: for (i = 0; i < p->indexsz; i++)
1.30 kristaps 243: texidex_free(&p->indexs[i]);
1.7 kristaps 244: for (i = 0; i < p->valsz; i++)
245: texivaluefree(&p->vals[i]);
1.4 kristaps 246:
1.32 ! kristaps 247: free(p->nodecache);
1.7 kristaps 248: free(p->macros);
1.1 kristaps 249: free(p->vals);
1.4 kristaps 250: free(p->indexs);
1.1 kristaps 251: free(p->dirs);
252: free(p->subtitle);
253: free(p->title);
1.26 kristaps 254: free(p->copying);
1.1 kristaps 255: }
256:
257: /*
258: * Fatal error: unmap all files and exit.
259: * The "errstring" is passed to perror(3).
260: */
261: void
262: texiabort(struct texi *p, const char *errstring)
263: {
264:
265: perror(errstring);
266: texiexit(p);
267: exit(EXIT_FAILURE);
268: }
269:
270: /*
271: * Print a generic warning message (to stderr) tied to our current
272: * location in the parse sequence.
273: */
274: void
275: texiwarn(const struct texi *p, const char *fmt, ...)
276: {
1.15 kristaps 277: va_list ap;
278: const struct texifile *f;
279:
280: f = &p->files[p->filepos - 1];
281:
282: if (f->insplice)
283: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
284: "warning: ", f->name, f->line + 1,
285: f->col + 1, f->insplice);
286: else
287: fprintf(stderr, "%s:%zu:%zu: warning: ",
288: f->name, f->line + 1, f->col + 1);
1.1 kristaps 289:
290: va_start(ap, fmt);
291: vfprintf(stderr, fmt, ap);
292: va_end(ap);
293: fputc('\n', stderr);
294: }
295:
296: /*
297: * Print an error message (to stderr) tied to our current location in
298: * the parse sequence, invoke texiexit(), then die.
299: */
300: void
301: texierr(struct texi *p, const char *fmt, ...)
302: {
1.15 kristaps 303: va_list ap;
304: struct texifile *f;
305:
306: f = &p->files[p->filepos - 1];
307:
308: if (f->insplice)
309: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
310: "error: ", f->name, f->line + 1,
311: f->col + 1, f->insplice);
312: else
313: fprintf(stderr, "%s:%zu:%zu: error: ",
314: f->name, f->line + 1, f->col + 1);
1.1 kristaps 315:
316: va_start(ap, fmt);
317: vfprintf(stderr, fmt, ap);
318: va_end(ap);
319: fputc('\n', stderr);
320: texiexit(p);
321: exit(EXIT_FAILURE);
322: }
323:
324: /*
325: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 326: * Escape starting a line with a control character and slashes.
1.1 kristaps 327: */
328: void
329: texiputchar(struct texi *p, char c)
330: {
331:
332: if (p->ign)
333: return;
334: if ('.' == c && 0 == p->outcol)
1.20 kristaps 335: fputs("\\&", p->outfile);
1.10 kristaps 336: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 337: fputs("\\&", p->outfile);
1.1 kristaps 338:
1.23 kristaps 339: if (p->uppercase)
340: fputc(toupper((unsigned int)c), p->outfile);
341: else
342: fputc(c, p->outfile);
1.13 kristaps 343: if ('\\' == c)
1.20 kristaps 344: fputc('e', p->outfile);
1.1 kristaps 345: if ('\n' == c) {
346: p->outcol = 0;
347: p->seenws = 0;
348: } else
349: p->outcol++;
350: }
351:
352: /*
1.13 kristaps 353: * Put an opaque series of characters.
354: * Characters starting a line with a control character are escaped, but
355: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 356: */
357: void
358: texiputchars(struct texi *p, const char *s)
359: {
360:
1.13 kristaps 361: if (p->ign)
362: return;
363: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 364: fputs("\\&", p->outfile);
1.13 kristaps 365: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 366: fputs("\\&", p->outfile);
1.23 kristaps 367: if (p->uppercase)
368: for ( ; '\0' != *s; s++)
369: p->outcol += fputc(toupper
370: ((unsigned int)*s), p->outfile);
371: else
372: p->outcol += fputs(s, p->outfile);
1.9 kristaps 373: }
374:
375: /*
376: * This puts all characters onto the output stream but makes sure to
377: * escape mdoc(7) slashes.
1.14 kristaps 378: * FIXME: useless.
1.9 kristaps 379: */
380: void
1.14 kristaps 381: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 382: {
383:
1.14 kristaps 384: for ( ; start < end; start++)
385: texiputchar(p, BUF(p)[start]);
1.1 kristaps 386: }
387:
388: /*
389: * Close an mdoc(7) macro opened with teximacroopen().
390: * If there are no more macros on the line, prints a newline.
391: */
392: void
393: teximacroclose(struct texi *p)
394: {
395:
1.30 kristaps 396: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 397: return;
398:
399: if (0 == --p->outmacro) {
1.20 kristaps 400: fputc('\n', p->outfile);
1.1 kristaps 401: p->outcol = p->seenws = 0;
402: }
403: }
404:
405: /*
406: * Open a mdoc(7) macro.
407: * This is used for line macros, e.g., Qq [foo bar baz].
408: * It can be invoked for nested macros, e.g., Qq Li foo .
409: * TODO: flush-right punctuation (e.g., parenthesis).
410: */
411: void
412: teximacroopen(struct texi *p, const char *s)
413: {
414: int rc;
415:
1.30 kristaps 416: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 417: return;
418:
419: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 420: fputc('\n', p->outfile);
1.1 kristaps 421: p->outcol = 0;
422: }
423:
1.30 kristaps 424: if (p->seenvs > 0 && 0 == p->outmacro)
425: fputs(".Pp\n", p->outfile);
426:
1.1 kristaps 427: if (0 == p->outmacro)
1.20 kristaps 428: fputc('.', p->outfile);
1.1 kristaps 429: else
1.20 kristaps 430: fputc(' ', p->outfile);
1.1 kristaps 431:
1.20 kristaps 432: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 433: p->outcol += rc;
434:
1.20 kristaps 435: fputc(' ', p->outfile);
1.1 kristaps 436: p->outcol++;
437: p->outmacro++;
1.30 kristaps 438: p->seenws = p->seenvs = 0;
1.1 kristaps 439: }
440:
441: /*
442: * Put a stadnalone mdoc(7) command with the trailing newline.
443: */
444: void
445: teximacro(struct texi *p, const char *s)
446: {
447:
448: if (p->ign)
449: return;
450:
451: if (p->outmacro)
452: texierr(p, "\"%s\" in open line scope!?", s);
453: if (p->literal)
454: texierr(p, "\"%s\" in a literal scope!?", s);
455: if (p->outcol)
1.20 kristaps 456: fputc('\n', p->outfile);
1.30 kristaps 457: if (p->seenvs > 0)
458: fputs(".Pp\n", p->outfile);
1.1 kristaps 459:
1.20 kristaps 460: fputc('.', p->outfile);
461: fputs(s, p->outfile);
462: fputc('\n', p->outfile);
1.1 kristaps 463: p->outcol = p->seenws = 0;
464: }
465:
466: /*
467: * Introduce vertical space during normal (non-macro) input.
468: */
469: void
470: texivspace(struct texi *p)
471: {
472:
1.30 kristaps 473: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
474: p->seenvs = 1;
1.1 kristaps 475: }
476:
477: /*
478: * Advance by a single byte in the input stream, adjusting our location
479: * in the current input file.
480: */
481: void
1.14 kristaps 482: advance(struct texi *p, size_t *pos)
1.1 kristaps 483: {
1.15 kristaps 484: struct texifile *f;
1.1 kristaps 485:
1.15 kristaps 486: f = &p->files[p->filepos - 1];
487:
488: if (0 == f->insplice) {
489: if ('\n' == BUF(p)[*pos]) {
490: f->line++;
491: f->col = 0;
492: } else
493: f->col++;
1.17 kristaps 494: } else {
1.15 kristaps 495: --f->insplice;
1.17 kristaps 496: if (0 == f->insplice)
497: f->depth = 0;
498: }
1.1 kristaps 499:
500: (*pos)++;
501: }
502:
503: /*
504: * It's common to wait punctuation to float on the right side of macro
505: * lines in mdoc(7), e.g., ".Em hello ) ."
506: * This function does so, and should be called before teximacroclose().
507: * It will detect that it's the last in the nested macros and
508: * appropriately flush-left punctuation alongside the macro.
509: */
510: void
1.14 kristaps 511: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 512: {
513: size_t start, end;
514:
515: if (1 != p->outmacro)
516: return;
517:
1.14 kristaps 518: for (start = end = *pos; end < BUFSZ(p); end++) {
519: switch (BUF(p)[end]) {
1.1 kristaps 520: case (','):
521: case (')'):
522: case ('.'):
523: case ('"'):
524: case (':'):
1.22 kristaps 525: case (';'):
1.1 kristaps 526: case ('!'):
527: case ('?'):
528: continue;
529: default:
530: break;
531: }
532: break;
533: }
534: if (end == *pos)
535: return;
1.14 kristaps 536: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 kristaps 537: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 538: for ( ; start < end; start++) {
539: texiputchar(p, ' ');
1.14 kristaps 540: texiputchar(p, BUF(p)[start]);
541: advance(p, pos);
1.1 kristaps 542: }
543: }
544: }
545:
546: /*
547: * Advance to the next non-whitespace word in the input stream.
548: * If we're in literal mode, then print all of the whitespace as we're
549: * doing so.
550: */
551: static size_t
1.14 kristaps 552: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 553: {
554:
555: if (p->literal) {
1.14 kristaps 556: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
557: texiputchar(p, BUF(p)[*pos]);
558: advance(p, pos);
1.1 kristaps 559: }
560: return(*pos);
561: }
562:
1.14 kristaps 563: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 564: p->seenws = 1;
1.30 kristaps 565: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
566: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
567: p->seenvs = 1;
1.14 kristaps 568: advance(p, pos);
1.1 kristaps 569: }
570: return(*pos);
571: }
572:
573: /*
574: * Advance to the EOLN in the input stream.
1.22 kristaps 575: * This will skip over '@' markers in an effort to ignore escaped
576: * newlines.
1.1 kristaps 577: */
578: size_t
1.14 kristaps 579: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 580: {
581:
1.22 kristaps 582: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
583: if ('@' == BUF(p)[*pos])
584: advance(p, pos);
1.14 kristaps 585: advance(p, pos);
1.22 kristaps 586: }
1.14 kristaps 587: if (*pos < BUFSZ(p) && consumenl)
588: advance(p, pos);
1.1 kristaps 589: return(*pos);
590: }
591:
592: /*
593: * Advance to position "end", which is an absolute position in the
594: * current buffer greater than or equal to the current position.
595: */
596: void
1.14 kristaps 597: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 598: {
599:
600: assert(*pos <= end);
601: while (*pos < end)
1.14 kristaps 602: advance(p, pos);
1.1 kristaps 603: }
604:
1.7 kristaps 605: static void
1.17 kristaps 606: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 607: {
1.11 kristaps 608: size_t valsz, realsz, aasz, asz,
609: ssz, i, j, k, start, end;
610: char *val;
611: char **args;
612: const char *cp;
1.7 kristaps 613:
1.17 kristaps 614: /* Disregard empty macros. */
1.22 kristaps 615: if (0 == (valsz = realsz = strlen(m->value))) {
616: args = argparse(p, pos, &asz, m->argsz);
617: for (i = 0; i < asz; i++)
618: free(args[i]);
619: free(args);
1.17 kristaps 620: return;
1.22 kristaps 621: }
1.17 kristaps 622:
623: /*
624: * This is important: it protect us from macros that invoke more
625: * macros, possibly going on infinitely.
626: * We use "sv" instead of the current position because we might
627: * be invoked at the end of the macro (i.e., insplice == 0).
628: * The "sv" value was initialised at the start of the macro.
629: */
630: if (sv > 0)
1.24 kristaps 631: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 632: texierr(p, "maximium recursive depth");
633:
1.14 kristaps 634: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 635: if (asz != m->argsz)
636: texiwarn(p, "invalid macro argument length");
637: aasz = asz < m->argsz ? asz : m->argsz;
638:
639: if (0 == aasz) {
1.21 kristaps 640: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 641: return;
642: }
643:
644: val = strdup(m->value);
645:
646: for (i = j = 0; i < realsz; i++) {
647: /* Parse blindly til the backslash delimiter. */
648: if ('\\' != m->value[i]) {
649: val[j++] = m->value[i];
650: val[j] = '\0';
651: continue;
652: } else if (i == realsz - 1)
653: texierr(p, "trailing argument name delimiter");
654:
655: /* Double-backslash is escaped. */
656: if ('\\' == m->value[i + 1]) {
657: val[j++] = m->value[i++];
658: val[j] = '\0';
659: continue;
660: }
661:
662: assert('\\' == m->value[i] && i < realsz - 1);
663:
664: /* Parse to terminating delimiter. */
665: /* FIXME: embedded, escaped delimiters? */
666: for (start = end = i + 1; end < realsz; end++)
667: if ('\\' == m->value[end])
668: break;
669: if (end == realsz)
670: texierr(p, "unterminated argument name");
671:
672: for (k = 0; k < aasz; k++) {
673: if ((ssz = strlen(m->args[k])) != (end - start))
674: continue;
675: if (strncmp(&m->value[start], m->args[k], ssz))
676: continue;
677: break;
678: }
679:
680: /*
681: * Argument didn't exist in argument table.
1.14 kristaps 682: * Just ignore it.
1.7 kristaps 683: */
684: if (k == aasz) {
1.14 kristaps 685: i = end;
1.7 kristaps 686: continue;
687: }
688:
689: if (strlen(args[k]) > ssz) {
690: valsz += strlen(args[k]);
691: val = realloc(val, valsz + 1);
692: if (NULL == val)
693: texiabort(p, NULL);
694: }
695:
1.11 kristaps 696: for (cp = args[k]; '\0' != *cp; cp++)
697: val[j++] = *cp;
698:
699: val[j] = '\0';
1.7 kristaps 700: i = end;
701: }
702:
1.21 kristaps 703: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 704:
705: for (i = 0; i < asz; i++)
706: free(args[i]);
707: free(args);
708: free(val);
709: }
710:
1.1 kristaps 711: /*
712: * Output a free-form word in the input stream, progressing to the next
713: * command or white-space.
714: * This also will advance the input stream.
715: */
716: static void
1.14 kristaps 717: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 718: {
1.29 kristaps 719: size_t i, end, len;
720: int c;
1.1 kristaps 721:
1.25 kristaps 722: /*
1.27 kristaps 723: * If a prior word had a terminating double-newline, then begin
724: * this text block with a `Pp'.
725: * We don't do this if we're in a literal context (we'll print
726: * out the newlines themselves) nor in a `TS' table.
727: */
1.30 kristaps 728: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
729: if (p->outcol > 0)
730: fputc('\n', p->outfile);
731: fputs(".Pp\n", p->outfile);
732: p->outcol = 0;
733: }
1.27 kristaps 734:
735: /*
1.25 kristaps 736: * Some line control: if we (non-macro, non-literal) already
737: * have more than 72 characters written to the screen, then
738: * output a newline before getting started.
739: */
1.1 kristaps 740: if (p->seenws && 0 == p->outmacro &&
741: p->outcol > 72 && 0 == p->literal)
742: texiputchar(p, '\n');
1.25 kristaps 743:
744: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 745: if (p->seenws && p->outcol && 0 == p->literal)
746: texiputchar(p, ' ');
747:
748: p->seenws = 0;
1.29 kristaps 749:
750: /*
751: * If we're in a macro line, we might want to print text that
752: * happens to be the same as an mdoc(7) macro.
753: * Obviously, we need to escape these words.
754: */
755: if (p->outmacro) {
756: end = *pos;
757: /* Read ahead to get the word length. */
758: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
759: switch ((c = BUF(p)[end])) {
760: case ('@'):
761: case ('}'):
762: case ('{'):
763: break;
764: default:
765: if ('\0' != extra && extra == c)
766: break;
767: end++;
768: continue;
769: }
770: break;
771: }
772: len = end - *pos;
773: /* See if we have a match. */
774: for (i = 0; NULL != mdocs[i]; i++) {
775: /* All macros are 2 or three letters. */
776: if (len < 2 || len > 3)
777: continue;
778: /* Check the macro word length. */
779: if ('\0' == mdocs[i][2] && 2 != len)
780: continue;
781: else if ('\0' == mdocs[i][3] && 3 != len)
782: continue;
783: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
784: continue;
785: texiputchars(p, "\\&");
786: break;
787: }
788: }
1.1 kristaps 789:
1.14 kristaps 790: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
791: switch (BUF(p)[*pos]) {
1.1 kristaps 792: case ('@'):
793: case ('}'):
794: case ('{'):
795: return;
796: }
1.14 kristaps 797: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 798: return;
1.28 kristaps 799:
800: if (p->literal) {
801: texiputchar(p, BUF(p)[*pos]);
802: advance(p, pos);
803: continue;
804: }
805:
1.30 kristaps 806: if ('"' == BUF(p)[*pos]) {
807: texiputchars(p, "\\(dq");
808: } else if (*pos < BUFSZ(p) - 2 &&
1.28 kristaps 809: '-' == BUF(p)[*pos] &&
810: '-' == BUF(p)[*pos + 1] &&
811: '-' == BUF(p)[*pos + 2]) {
812: texiputchars(p, "\\(em");
813: advance(p, pos);
814: advance(p, pos);
815: } else if (*pos < BUFSZ(p) - 1 &&
816: '-' == BUF(p)[*pos] &&
817: '-' == BUF(p)[*pos + 1]) {
818: texiputchars(p, "\\(en");
819: advance(p, pos);
820: } else if (*pos < BUFSZ(p) - 1 &&
1.14 kristaps 821: '`' == BUF(p)[*pos] &&
822: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 823: texiputchars(p, "\\(lq");
1.14 kristaps 824: advance(p, pos);
825: } else if (*pos < BUFSZ(p) - 1 &&
826: '\'' == BUF(p)[*pos] &&
827: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 828: texiputchars(p, "\\(rq");
1.14 kristaps 829: advance(p, pos);
1.1 kristaps 830: } else
1.14 kristaps 831: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 832:
1.14 kristaps 833: advance(p, pos);
1.1 kristaps 834: }
1.25 kristaps 835:
836: /*
837: * New sentence, new line:if we (non-macro, non-literal) see a
838: * period at the end of the last printed word, then open a
839: * newline.
840: */
1.30 kristaps 841: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
842: switch (BUF(p)[*pos - 1]) {
843: case ('.'):
844: case ('!'):
845: case ('?'):
846: texiputchar(p, '\n');
847: break;
848: default:
849: break;
850: }
851:
852: p->seenvs = 0;
1.1 kristaps 853: }
854:
855: /*
856: * Look up the command at position "pos" in the buffer, returning it (or
857: * TEXICMD__MAX if none found) and setting "end" to be the absolute
858: * index after the command name.
859: */
860: enum texicmd
1.19 kristaps 861: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 862: {
1.4 kristaps 863: size_t i, len, toksz;
1.1 kristaps 864:
1.14 kristaps 865: assert('@' == BUF(p)[pos]);
1.1 kristaps 866:
1.7 kristaps 867: if (NULL != macro)
868: *macro = NULL;
869:
1.14 kristaps 870: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 871: return(TEXICMD__MAX);
1.14 kristaps 872: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 873: return(TEXICMD__MAX);
874:
875: /* Alphabetic commands are special. */
1.23 kristaps 876: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 877: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 878: return(TEXICMD__MAX);
879: for (i = 0; i < TEXICMD__MAX; i++) {
880: if (1 != texitoks[i].len)
881: continue;
1.14 kristaps 882: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 883: return(i);
884: }
1.14 kristaps 885: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 886: return(TEXICMD__MAX);
887: }
888:
1.4 kristaps 889: /* Scan to the end of the possible command name. */
1.14 kristaps 890: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
891: if ((*end > pos && ('@' == BUF(p)[*end] ||
892: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 893: break;
894:
1.4 kristaps 895: /* Look for the command. */
1.1 kristaps 896: len = *end - pos;
897: for (i = 0; i < TEXICMD__MAX; i++) {
898: if (len != texitoks[i].len)
899: continue;
1.14 kristaps 900: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 901: return(i);
902: }
903:
1.4 kristaps 904: /* Look for it in our indices. */
905: for (i = 0; i < p->indexsz; i++) {
1.30 kristaps 906: toksz = strlen(p->indexs[i].name);
1.4 kristaps 907: if (len != 5 + toksz)
908: continue;
1.30 kristaps 909: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 910: continue;
1.14 kristaps 911: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 912: return(TEXICMD_USER_INDEX);
913: }
914:
915: for (i = 0; i < p->macrosz; i++) {
916: if (len != strlen(p->macros[i].key))
917: continue;
1.14 kristaps 918: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 919: continue;
920: if (NULL != macro)
921: *macro = &p->macros[i];
922: return(TEXICMD__MAX);
1.4 kristaps 923: }
924:
1.14 kristaps 925: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 926: return(TEXICMD__MAX);
927: }
928:
929: /*
930: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
931: * Num should be set to the argument we're currently parsing, although
932: * it suffixes for it to be zero or non-zero.
933: * This will return 1 if there are more arguments, 0 otherwise.
934: * This will stop (returning 0) in the event of EOF or if we're not at a
935: * bracket for the zeroth parse.
936: */
937: int
1.14 kristaps 938: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 939: {
1.17 kristaps 940: size_t end, sv;
1.7 kristaps 941: enum texicmd cmd;
942: struct teximacro *macro;
1.1 kristaps 943:
1.14 kristaps 944: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
945: advance(p, pos);
946: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 947: return(0);
948: if (0 == num)
1.14 kristaps 949: advance(p, pos);
1.1 kristaps 950:
1.14 kristaps 951: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
952: switch (BUF(p)[*pos]) {
1.1 kristaps 953: case (','):
1.14 kristaps 954: advance(p, pos);
1.1 kristaps 955: return(1);
956: case ('}'):
1.14 kristaps 957: advance(p, pos);
1.1 kristaps 958: return(0);
959: case ('{'):
960: if (0 == p->ign)
961: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 962: advance(p, pos);
1.1 kristaps 963: continue;
964: case ('@'):
965: break;
966: default:
1.14 kristaps 967: parseword(p, pos, ',');
1.1 kristaps 968: continue;
969: }
970:
1.17 kristaps 971: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 972: cmd = texicmd(p, *pos, &end, ¯o);
973: advanceto(p, pos, end);
1.7 kristaps 974: if (NULL != macro)
1.17 kristaps 975: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 976: if (TEXICMD__MAX == cmd)
977: continue;
978: if (NULL != texitoks[cmd].fp)
1.14 kristaps 979: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 980: }
981: return(0);
982: }
983:
984: /*
985: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
986: * This will stop in the event of EOF or if we're not at a bracket.
987: */
988: void
1.18 kristaps 989: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 990: {
1.18 kristaps 991: size_t end, sv, stack;
1.7 kristaps 992: enum texicmd cmd;
993: struct teximacro *macro;
1.1 kristaps 994:
1.14 kristaps 995: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
996: advance(p, pos);
1.1 kristaps 997:
1.14 kristaps 998: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 999: return;
1.14 kristaps 1000: advance(p, pos);
1.1 kristaps 1001:
1.18 kristaps 1002: stack = 0;
1.14 kristaps 1003: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1004: switch (BUF(p)[*pos]) {
1.1 kristaps 1005: case ('}'):
1.18 kristaps 1006: if (stack > 0) {
1007: stack--;
1008: advance(p, pos);
1009: texiputchar(p, '}');
1010: continue;
1011: }
1.14 kristaps 1012: advance(p, pos);
1.1 kristaps 1013: return;
1014: case ('{'):
1.18 kristaps 1015: if (dostack) {
1016: stack++;
1017: advance(p, pos);
1018: texiputchar(p, '{');
1019: continue;
1020: }
1.1 kristaps 1021: if (0 == p->ign)
1022: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1023: advance(p, pos);
1.1 kristaps 1024: continue;
1025: case ('@'):
1026: break;
1027: default:
1.14 kristaps 1028: parseword(p, pos, '\0');
1.1 kristaps 1029: continue;
1030: }
1031:
1.17 kristaps 1032: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1033: cmd = texicmd(p, *pos, &end, ¯o);
1034: advanceto(p, pos, end);
1.7 kristaps 1035: if (NULL != macro)
1.17 kristaps 1036: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1037: if (TEXICMD__MAX == cmd)
1038: continue;
1039: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1040: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1041: }
1042: }
1043:
1044: /*
1045: * This should be invoked when we're on a macro line and want to process
1046: * to the end of the current input line, doing all of our macros along
1047: * the way.
1048: */
1049: void
1.14 kristaps 1050: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1051: {
1.17 kristaps 1052: size_t end, sv;
1.7 kristaps 1053: enum texicmd cmd;
1054: struct teximacro *macro;
1.1 kristaps 1055:
1.14 kristaps 1056: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1057: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1058: p->seenws = 1;
1059: if (p->literal)
1.14 kristaps 1060: texiputchar(p, BUF(p)[*pos]);
1061: advance(p, pos);
1.1 kristaps 1062: }
1.14 kristaps 1063: switch (BUF(p)[*pos]) {
1.1 kristaps 1064: case ('}'):
1065: if (0 == p->ign)
1066: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1067: advance(p, pos);
1.1 kristaps 1068: continue;
1069: case ('{'):
1070: if (0 == p->ign)
1071: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1072: advance(p, pos);
1.1 kristaps 1073: continue;
1.30 kristaps 1074: case ('\n'):
1075: continue;
1.1 kristaps 1076: case ('@'):
1077: break;
1078: default:
1.14 kristaps 1079: parseword(p, pos, '\0');
1.1 kristaps 1080: continue;
1081: }
1082:
1.17 kristaps 1083: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1084: cmd = texicmd(p, *pos, &end, ¯o);
1085: advanceto(p, pos, end);
1.7 kristaps 1086: if (NULL != macro)
1.17 kristaps 1087: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1088: if (TEXICMD__MAX == cmd)
1089: continue;
1090: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1091: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1092: }
1.14 kristaps 1093:
1094: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1095: advance(p, pos);
1.19 kristaps 1096: }
1097:
1.30 kristaps 1098: enum texicmd
1099: peeklinecmd(const struct texi *p, size_t pos)
1100: {
1101: size_t end;
1102:
1103: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
1104: pos++;
1105: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1106: return(TEXICMD__MAX);
1107: return(texicmd(p, pos, &end, NULL));
1108: }
1109:
1.19 kristaps 1110: /*
1111: * Peek to see if there's a command after subsequent whitespace.
1112: * If so, return the macro identifier.
1113: * This DOES NOT work with user-defined macros.
1114: */
1115: enum texicmd
1116: peekcmd(const struct texi *p, size_t pos)
1117: {
1118: size_t end;
1119:
1120: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1121: pos++;
1122: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1123: return(TEXICMD__MAX);
1124: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1125: }
1126:
1127: /*
1128: * Parse a single word or command.
1129: * This will return immediately at the EOF.
1130: */
1.32 ! kristaps 1131: void
1.14 kristaps 1132: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1133: {
1.17 kristaps 1134: size_t end, sv;
1.7 kristaps 1135: enum texicmd cmd;
1136: struct teximacro *macro;
1.1 kristaps 1137:
1.14 kristaps 1138: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1139: return;
1140:
1.14 kristaps 1141: switch (BUF(p)[*pos]) {
1.1 kristaps 1142: case ('}'):
1143: if (0 == p->ign)
1144: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1145: advance(p, pos);
1.1 kristaps 1146: return;
1147: case ('{'):
1148: if (0 == p->ign)
1149: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1150: advance(p, pos);
1.1 kristaps 1151: return;
1152: case ('@'):
1153: break;
1154: default:
1.14 kristaps 1155: parseword(p, pos, '\0');
1.1 kristaps 1156: return;
1157: }
1158:
1.17 kristaps 1159: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1160: cmd = texicmd(p, *pos, &end, ¯o);
1161: advanceto(p, pos, end);
1.7 kristaps 1162: if (NULL != macro)
1.17 kristaps 1163: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1164: if (TEXICMD__MAX == cmd)
1165: return;
1166: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1167: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1168: }
1169:
1170: /*
1171: * This is used in the @deffn type of command.
1172: * These have an arbitrary number of line arguments; however, these
1173: * arguments may or may not be surrounded by brackets.
1174: * In this function, we parse each one as either a bracketed or
1175: * non-bracketed argument, returning 0 when we've reached the end of
1176: * line or 1 otherwise.
1177: */
1178: int
1.14 kristaps 1179: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1180: {
1181:
1.14 kristaps 1182: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1183: p->seenws = 1;
1.14 kristaps 1184: advance(p, pos);
1.1 kristaps 1185: }
1186:
1.14 kristaps 1187: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1188: parsebracket(p, pos, 0);
1.14 kristaps 1189: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1190: parsesingle(p, pos);
1.1 kristaps 1191: else
1192: return(0);
1193:
1194: return(1);
1195: }
1196:
1197: /*
1198: * Parse til the end of the buffer.
1199: */
1.14 kristaps 1200: static void
1201: parseeof(struct texi *p)
1.1 kristaps 1202: {
1203: size_t pos;
1204:
1.14 kristaps 1205: for (pos = 0; pos < BUFSZ(p); )
1206: parsesingle(p, &pos);
1.1 kristaps 1207: }
1208:
1.8 kristaps 1209: void
1.21 kristaps 1210: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1211: {
1.14 kristaps 1212: char *cp;
1213: struct texifile *f;
1.8 kristaps 1214:
1.14 kristaps 1215: assert(p->filepos > 0);
1216: f = &p->files[p->filepos - 1];
1.8 kristaps 1217:
1.14 kristaps 1218: if (f->mapsz + sz > f->mapmaxsz) {
1219: f->mapmaxsz = f->mapsz + sz + 1024;
1220: cp = realloc(f->map, f->mapmaxsz);
1221: if (NULL == cp)
1222: texiabort(p, NULL);
1223: f->map = cp;
1224: }
1.8 kristaps 1225:
1.15 kristaps 1226: f->insplice += sz;
1.21 kristaps 1227: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1228: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1229: f->mapsz += sz;
1.8 kristaps 1230: }
1231:
1232: /*
1.1 kristaps 1233: * Parse a block sequence until we have the "@end endtoken" command
1234: * invocation.
1235: * This will return immediately at EOF.
1236: */
1237: void
1.14 kristaps 1238: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1239: {
1.17 kristaps 1240: size_t end, sv;
1.7 kristaps 1241: enum texicmd cmd;
1242: size_t endtoksz;
1243: struct teximacro *macro;
1.1 kristaps 1244:
1245: endtoksz = strlen(endtoken);
1246: assert(endtoksz > 0);
1247:
1.14 kristaps 1248: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1249: switch (BUF(p)[*pos]) {
1.1 kristaps 1250: case ('}'):
1251: if (0 == p->ign)
1252: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1253: advance(p, pos);
1.1 kristaps 1254: continue;
1255: case ('{'):
1256: if (0 == p->ign)
1257: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1258: advance(p, pos);
1.1 kristaps 1259: continue;
1260: case ('@'):
1261: break;
1262: default:
1.14 kristaps 1263: parseword(p, pos, '\0');
1.1 kristaps 1264: continue;
1265: }
1266:
1.17 kristaps 1267: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1268: cmd = texicmd(p, *pos, &end, ¯o);
1269: advanceto(p, pos, end);
1.1 kristaps 1270: if (TEXICMD_END == cmd) {
1.14 kristaps 1271: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1272: advance(p, pos);
1.1 kristaps 1273: /*
1274: * FIXME: check the full word, not just its
1275: * initial substring!
1276: */
1.14 kristaps 1277: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1278: (&BUF(p)[*pos], endtoken, endtoksz)) {
1279: advanceeoln(p, pos, 0);
1.1 kristaps 1280: break;
1281: }
1282: if (0 == p->ign)
1283: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1284: advanceeoln(p, pos, 0);
1.1 kristaps 1285: continue;
1.7 kristaps 1286: }
1287: if (NULL != macro)
1.17 kristaps 1288: texiexecmacro(p, macro, sv, pos);
1.7 kristaps 1289: if (TEXICMD__MAX == cmd)
1290: continue;
1291: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1292: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1293: }
1.30 kristaps 1294:
1295: if (*pos == BUFSZ(p))
1296: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1297: }
1298:
1299: /*
1.12 kristaps 1300: * Like parsefile() but used for reading from stdandard input.
1301: * This can only be called for the first file!
1302: */
1303: void
1304: parsestdin(struct texi *p)
1305: {
1306: struct texifile *f;
1307: ssize_t ssz;
1308:
1309: assert(0 == p->filepos);
1310: f = &p->files[p->filepos];
1311: memset(f, 0, sizeof(struct texifile));
1312:
1313: f->type = TEXISRC_STDIN;
1314: f->name = "<stdin>";
1315:
1.14 kristaps 1316: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1317: if (f->mapsz == f->mapmaxsz) {
1318: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1319: texierr(p, "stdin buffer too long");
1.14 kristaps 1320: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1321: 2 * f->mapmaxsz : 65536;
1322: f->map = realloc(f->map, f->mapmaxsz);
1.12 kristaps 1323: if (NULL == f->map)
1324: texiabort(p, NULL);
1325: }
1.14 kristaps 1326: ssz = read(STDIN_FILENO, f->map +
1327: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1328: if (0 == ssz)
1329: break;
1330: else if (-1 == ssz)
1331: texiabort(p, NULL);
1332: }
1333:
1334: p->filepos++;
1.14 kristaps 1335: parseeof(p);
1.12 kristaps 1336: texifilepop(p);
1337: }
1338:
1339: /*
1.1 kristaps 1340: * Memory-map the file "fname" and begin parsing it unless "parse" is
1341: * zero, in which case we just dump the file to stdout (making sure it
1342: * doesn't trip up mdoc(7) along the way).
1343: * This can be called in a nested context.
1344: */
1345: void
1346: parsefile(struct texi *p, const char *fname, int parse)
1347: {
1348: struct texifile *f;
1349: int fd;
1350: struct stat st;
1351: size_t i;
1.14 kristaps 1352: char *map;
1.1 kristaps 1353:
1.5 kristaps 1354: if (64 == p->filepos)
1.6 kristaps 1355: texierr(p, "too many open files");
1.1 kristaps 1356: f = &p->files[p->filepos];
1357: memset(f, 0, sizeof(struct texifile));
1358:
1.12 kristaps 1359: f->type = TEXISRC_FILE;
1.1 kristaps 1360: f->name = fname;
1361: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1362: texiabort(p, fname);
1363: } else if (-1 == fstat(fd, &st)) {
1364: close(fd);
1365: texiabort(p, fname);
1366: }
1367:
1.14 kristaps 1368: f->mapsz = f->mapmaxsz = st.st_size;
1369: map = mmap(NULL, f->mapsz,
1.1 kristaps 1370: PROT_READ, MAP_SHARED, fd, 0);
1371: close(fd);
1372:
1.14 kristaps 1373: if (MAP_FAILED == map)
1.1 kristaps 1374: texiabort(p, fname);
1375:
1376: if ( ! parse) {
1.13 kristaps 1377: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1378: texiputchar(p, map[i]);
1.13 kristaps 1379: if (p->outcol)
1380: texiputchar(p, '\n');
1.14 kristaps 1381: munmap(map, f->mapsz);
1382: return;
1383: }
1384:
1385: p->filepos++;
1386: f->map = malloc(f->mapsz);
1387: memcpy(f->map, map, f->mapsz);
1388: munmap(map, f->mapsz);
1389: parseeof(p);
1.1 kristaps 1390: texifilepop(p);
1391: }
1392:
1.2 kristaps 1393: /*
1394: * Look up the value to a stored pair's value starting in "buf" from
1395: * start to end.
1396: * Return the pointer to the value memory, which can be NULL if the
1397: * pointer key does not exist.
1398: * The pointer can point to NULL if the value has been unset.
1399: */
1400: static char **
1.14 kristaps 1401: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1402: {
1403: size_t i, sz, len;
1404:
1405: assert(end >= start);
1406: /* Ignore zero-length. */
1407: if (0 == (len = (end - start)))
1408: return(NULL);
1409: for (i = 0; i < p->valsz; i++) {
1410: sz = strlen(p->vals[i].key);
1411: if (sz != len)
1412: continue;
1.14 kristaps 1413: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1414: return(&p->vals[i].value);
1415: }
1416: return(NULL);
1417: }
1418:
1419: /*
1420: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1421: * pointer to its value via valuequery().
1422: */
1423: static char **
1.14 kristaps 1424: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1425: {
1426: size_t start, end;
1427: char **ret;
1428:
1.14 kristaps 1429: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1430: advance(p, pos);
1431: if (*pos == BUFSZ(p))
1.2 kristaps 1432: return(NULL);
1.14 kristaps 1433: for (start = end = *pos; end < BUFSZ(p); end++)
1434: if ('\n' == BUF(p)[end])
1.2 kristaps 1435: break;
1.14 kristaps 1436: advanceto(p, pos, end);
1437: if (*pos < BUFSZ(p)) {
1438: assert('\n' == BUF(p)[*pos]);
1439: advance(p, pos);
1.2 kristaps 1440: }
1.14 kristaps 1441: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1442: return(NULL);
1443: return(ret);
1444: }
1445:
1446: void
1.14 kristaps 1447: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1448: {
1449: char **ret;
1450:
1.14 kristaps 1451: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1452: return;
1453: free(*ret);
1454: *ret = NULL;
1455: }
1456:
1457: const char *
1.14 kristaps 1458: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1459: {
1460: char **ret;
1461:
1.14 kristaps 1462: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1463: return(NULL);
1464: return(*ret);
1465: }
1466:
1467: /*
1468: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1469: * the pointer to its value.
1470: * If the returned pointer is NULL, either there was no string within
1471: * the brackets (or no brackets), or the value was not found, or the
1472: * value had previously been unset.
1473: */
1474: const char *
1.14 kristaps 1475: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1476: {
1477: size_t start, end;
1478: char **ret;
1479:
1.14 kristaps 1480: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1481: advance(p, pos);
1482: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1483: return(NULL);
1.14 kristaps 1484: advance(p, pos);
1485: for (start = end = *pos; end < BUFSZ(p); end++)
1486: if ('}' == BUF(p)[end])
1.2 kristaps 1487: break;
1.14 kristaps 1488: advanceto(p, pos, end);
1489: if (*pos < BUFSZ(p)) {
1490: assert('}' == BUF(p)[*pos]);
1491: advance(p, pos);
1.2 kristaps 1492: }
1.14 kristaps 1493: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1494: return(NULL);
1495: return(*ret);
1496: }
1497:
1498: void
1499: valueadd(struct texi *p, char *key, char *val)
1500: {
1501: size_t i;
1502:
1503: assert(NULL != key);
1504: assert(NULL != val);
1505:
1506: for (i = 0; i < p->valsz; i++)
1507: if (0 == strcmp(p->vals[i].key, key))
1508: break;
1509:
1510: if (i < p->valsz) {
1511: free(key);
1512: free(p->vals[i].value);
1513: p->vals[i].value = val;
1514: } else {
1.4 kristaps 1515: /* FIXME: reallocarray() */
1.2 kristaps 1516: p->vals = realloc(p->vals,
1517: (p->valsz + 1) *
1518: sizeof(struct texivalue));
1.4 kristaps 1519: if (NULL == p->vals)
1520: texiabort(p, NULL);
1.2 kristaps 1521: p->vals[p->valsz].key = key;
1522: p->vals[p->valsz].value = val;
1523: p->valsz++;
1524: }
1.7 kristaps 1525: }
1526:
1527: /*
1528: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1529: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1530: * an array of arguments of size "argsz".
1531: * These need to be freed individually and as a whole.
1532: * NOTE: this will puke on @, or @} macros, which can trick it into
1533: * stopping argument parsing earlier.
1534: * Ergo, textual: this doesn't interpret the arguments in any way.
1535: */
1536: char **
1.14 kristaps 1537: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1538: {
1539: char **args;
1540: size_t start, end, stack;
1541:
1.14 kristaps 1542: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1543: advance(p, pos);
1.7 kristaps 1544:
1545: args = NULL;
1546: *argsz = 0;
1547:
1.17 kristaps 1548: if (*pos == BUFSZ(p))
1549: return(args);
1550:
1.14 kristaps 1551: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1552: /*
1553: * Special case: if we encounter an unbracketed argument
1554: * and we're being invoked with non-zero arguments
1555: * (versus being set, i.e., hint>0), then parse until
1556: * the end of line.
1557: */
1558: *argsz = 1;
1559: args = calloc(1, sizeof(char *));
1560: if (NULL == args)
1561: texiabort(p, NULL);
1562: start = *pos;
1.14 kristaps 1563: while (*pos < BUFSZ(p)) {
1564: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1565: break;
1.14 kristaps 1566: advance(p, pos);
1.10 kristaps 1567: }
1568: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1569: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1570: args[0][*pos - start] = '\0';
1.14 kristaps 1571: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1572: advance(p, pos);
1.10 kristaps 1573: return(args);
1.14 kristaps 1574: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1575: return(args);
1.17 kristaps 1576:
1577: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1578:
1579: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1580: advance(p, pos);
1581: while (*pos < BUFSZ(p)) {
1582: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1583: advance(p, pos);
1.7 kristaps 1584: start = *pos;
1585: stack = 0;
1.14 kristaps 1586: while (*pos < BUFSZ(p)) {
1.7 kristaps 1587: /*
1588: * According to the manual, commas within
1589: * embedded commands are escaped.
1590: * We keep track of embedded-ness in the "stack"
1591: * state anyway, so this is free.
1592: */
1.14 kristaps 1593: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1594: break;
1.14 kristaps 1595: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1596: break;
1.14 kristaps 1597: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1598: stack--;
1.14 kristaps 1599: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1600: stack++;
1.14 kristaps 1601: advance(p, pos);
1.7 kristaps 1602: }
1603: if (stack)
1604: texiwarn(p, "unterminated macro "
1605: "in macro arguments");
1.14 kristaps 1606: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1607: break;
1608: /* Test for zero-length '{ }'. */
1.14 kristaps 1609: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1610: break;
1611: /* FIXME: use reallocarray. */
1612: args = realloc
1613: (args, sizeof(char *) *
1614: (*argsz + 1));
1615: if (NULL == args)
1616: texiabort(p, NULL);
1617: args[*argsz] = malloc(end - start + 1);
1618: if (NULL == args[*argsz])
1619: texiabort(p, NULL);
1620: memcpy(args[*argsz],
1.14 kristaps 1621: &BUF(p)[start], end - start);
1.7 kristaps 1622: args[*argsz][end - start] = '\0';
1623: (*argsz)++;
1.14 kristaps 1624: if ('}' == BUF(p)[*pos])
1.7 kristaps 1625: break;
1.14 kristaps 1626: advance(p, pos);
1.7 kristaps 1627: }
1628:
1.14 kristaps 1629: if (*pos == BUFSZ(p))
1.7 kristaps 1630: texierr(p, "unterminated arguments");
1.14 kristaps 1631: assert('}' == BUF(p)[*pos]);
1632: advance(p, pos);
1.7 kristaps 1633: return(args);
1.2 kristaps 1634: }
1.20 kristaps 1635:
1636: /*
1637: * If we're printing chapters, then do some naviation here and then
1638: * close our outfile.
1639: * I want to call this the SEE ALSO section, but that's not really what
1640: * it is: we'll refer to the "initial" (top) node and the next and
1641: * previous chapters.
1642: */
1643: void
1644: teximdocclose(struct texi *p, int last)
1645: {
1646: char buf[PATH_MAX];
1647:
1.32 ! kristaps 1648: if (NULL == p->chapters || 1 == p->nodesz)
1.20 kristaps 1649: return;
1650:
1651: teximacro(p, "Sh INFO NAVIGATION");
1652:
1653: /* Print a reference to the "top" node. */
1.32 ! kristaps 1654: if (-1 != p->nodecache[p->nodecur].up) {
1.22 kristaps 1655: texiputchars(p, "Top node,");
1.32 ! kristaps 1656: snprintf(buf, sizeof(buf), "%s-%zd 7",
! 1657: p->chapters, p->nodecache[p->nodecur].up);
1.31 kristaps 1658: p->seenvs = 0;
1.20 kristaps 1659: teximacroopen(p, "Xr ");
1660: texiputchars(p, buf);
1.22 kristaps 1661: texiputchars(p, " ;");
1.20 kristaps 1662: teximacroclose(p);
1663: }
1664:
1.32 ! kristaps 1665: if (-1 != p->nodecache[p->nodecur].prev) {
1.22 kristaps 1666: texiputchars(p, "previous node,");
1.32 ! kristaps 1667: snprintf(buf, sizeof(buf), "%s-%zd 7",
! 1668: p->chapters, p->nodecache[p->nodecur].prev);
1.31 kristaps 1669: p->seenvs = 0;
1.20 kristaps 1670: teximacroopen(p, "Xr ");
1671: texiputchars(p, buf);
1672: if ( ! last)
1.22 kristaps 1673: texiputchars(p, " ;");
1.20 kristaps 1674: teximacroclose(p);
1675: }
1676:
1.32 ! kristaps 1677: if (-1 != p->nodecache[p->nodecur].next) {
! 1678: texiputchars(p, "next node,");
! 1679: snprintf(buf, sizeof(buf), "%s-%zd 7",
! 1680: p->chapters, p->nodecache[p->nodecur].next);
1.31 kristaps 1681: p->seenvs = 0;
1.20 kristaps 1682: teximacroopen(p, "Xr ");
1683: texiputchars(p, buf);
1684: teximacroclose(p);
1685: }
1686:
1687: fclose(p->outfile);
1.32 ! kristaps 1688: p->outfile = NULL;
! 1689: }
! 1690:
! 1691: ssize_t
! 1692: texicache(struct texi *p, const char *buf, size_t sz)
! 1693: {
! 1694: size_t i;
! 1695:
! 1696: for (i = 0; i < p->nodecachesz; i++) {
! 1697: if (sz != strlen(p->nodecache[i].name))
! 1698: continue;
! 1699: if (strncmp(buf, p->nodecache[i].name, sz))
! 1700: continue;
! 1701: break;
! 1702: }
! 1703: if (i < p->nodecachesz)
! 1704: return(i);
! 1705: if (NULL == buf)
! 1706: return(-1);
! 1707: p->nodecache = realloc
! 1708: (p->nodecache,
! 1709: (p->nodecachesz + 1) * sizeof(struct texinode));
! 1710: if (NULL == p->nodecache)
! 1711: texiabort(p, NULL);
! 1712: p->nodecache[p->nodecachesz].name = malloc(sz + 1);
! 1713: if (NULL == p->nodecache[p->nodecachesz].name)
! 1714: texiabort(p, NULL);
! 1715: memcpy(p->nodecache[p->nodecachesz].name, buf, sz);
! 1716: p->nodecache[p->nodecachesz].name[sz] = '\0';
! 1717: p->nodecache[p->nodecachesz].up =
! 1718: p->nodecache[p->nodecachesz].next =
! 1719: p->nodecache[p->nodecachesz].prev = -1;
! 1720: p->nodecachesz++;
! 1721: return(p->nodecachesz - 1);
1.20 kristaps 1722: }
1723:
1724: /*
1.32 ! kristaps 1725: * Here we print our standard mdoc(7) prologue.
! 1726: * We use the title set with @settitle for the `Nd' description
! 1727: * and the source document filename (the first one as invoked on
! 1728: * the command line) for the title.
! 1729: * The date is set to the current date.
1.20 kristaps 1730: */
1731: void
1.21 kristaps 1732: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1733: {
1734: const char *cp;
1735: time_t t;
1736: char date[32];
1737:
1738: t = time(NULL);
1739: strftime(date, sizeof(date), "%F", localtime(&t));
1740:
1.30 kristaps 1741: p->seenvs = -1;
1.20 kristaps 1742: teximacroopen(p, "Dd");
1743: texiputchars(p, date);
1744: teximacroclose(p);
1745: teximacroopen(p, "Dt");
1746: for (cp = p->title; '\0' != *cp; cp++)
1747: texiputchar(p, toupper((unsigned int)*cp));
1748: texiputchars(p, " 7");
1749: teximacroclose(p);
1750: teximacro(p, "Os");
1751: teximacro(p, "Sh NAME");
1752: teximacroopen(p, "Nm");
1753: for (cp = p->title; '\0' != *cp; cp++)
1754: texiputchar(p, *cp);
1755: teximacroclose(p);
1756: teximacroopen(p, "Nd");
1.21 kristaps 1757: /*
1758: * The subtitle `Nd' can consist of arbitrary macros, so paste
1759: * it and parse to the end of the line.
1760: */
1761: if (NULL != p->subtitle) {
1762: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1763: parseeoln(p, pos);
1764: } else
1.20 kristaps 1765: texiputchars(p, "Unknown description");
1766: teximacroclose(p);
1767: }
1768:
CVSweb