Annotation of texi2mdoc/util.c, Revision 1.33
1.33 ! kristaps 1: /* $Id: util.c,v 1.32 2015/03/12 10:44:34 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <time.h>
29: #include <unistd.h>
30:
31: #include "extern.h"
32:
33: /*
1.29 kristaps 34: * Table of macros.
35: * These ABSOLUTELY MUST BE 2 or three characters long.
36: */
37: static const char *const mdocs[] = {
38: "Ap", "Dd", "Dt", "Os",
39: "Sh", "Ss", "Pp", "D1",
40: "Dl", "Bd", "Ed", "Bl",
41: "El", "It", "Ad", "An",
42: "Ar", "Cd", "Cm", "Dv",
43: "Er", "Ev", "Ex", "Fa",
44: "Fd", "Fl", "Fn", "Ft",
45: "Ic", "In", "Li", "Nd",
46: "Nm", "Op", "Ot", "Pa",
47: "Rv", "St", "Va", "Vt",
48: "Xr", "%A", "%B", "%D",
49: "%I", "%J", "%N", "%O",
50: "%P", "%R", "%T", "%V",
51: "Ac", "Ao", "Aq", "At",
52: "Bc", "Bf", "Bo", "Bq",
53: "Bsx", "Bx", "Db", "Dc",
54: "Do", "Dq", "Ec", "Ef",
55: "Em", "Eo", "Fx", "Ms",
56: "No", "Ns", "Nx", "Ox",
57: "Pc", "Pf", "Po", "Pq",
58: "Qc", "Ql", "Qo", "Qq",
59: "Re", "Rs", "Sc", "So",
60: "Sq", "Sm", "Sx", "Sy",
61: "Tn", "Ux", "Xc", "Xo",
62: "Fo", "Fc", "Oo", "Oc",
63: "Bk", "Ek", "Bt", "Hf",
64: "Fr", "Ud", "Lb", "Lp",
65: "Lk", "Mt", "Brq", "Bro",
66: "Brc", "%C", "Es", "En",
67: "Dx", "%Q", "br", "sp",
68: "%U", "Ta", "ll", NULL,
69: };
70:
71: /*
1.1 kristaps 72: * Unmap the top-most file in the stack of files currently opened (that
73: * is, nested calls to parsefile()).
74: */
75: void
76: texifilepop(struct texi *p)
77: {
78: struct texifile *f;
79:
80: assert(p->filepos > 0);
81: f = &p->files[--p->filepos];
1.14 kristaps 82: free(f->map);
1.1 kristaps 83: }
84:
1.7 kristaps 85: static void
86: teximacrofree(struct teximacro *p)
87: {
88: size_t i;
89:
90: for (i = 0; i < p->argsz; i++)
91: free(p->args[i]);
92:
93: free(p->args);
94: free(p->key);
95: free(p->value);
96: }
97:
98: static void
99: texivaluefree(struct texivalue *p)
100: {
101:
102: free(p->key);
103: free(p->value);
104: }
105:
1.30 kristaps 106: static void
107: texidex_free(struct texidex *p)
108: {
109: size_t i;
110:
111: for (i = 0; i < p->indexsz; i++)
1.31 kristaps 112: free(p->index[i].term);
1.30 kristaps 113:
114: free(p->index);
115: free(p->name);
116: p->index = NULL;
117: p->indexsz = 0;
118: }
119:
120: /*
121: * Add the text beginning at "index" and of "sz" bytes to the index
122: * named "tok" with name size "toksz".
123: * This will also output the necessary mdoc(7) to construct the index.
124: */
125: void
126: texindex(struct texi *p, const char *tok,
127: size_t toksz, const char *index, size_t sz)
128: {
1.31 kristaps 129: size_t i, isz;
1.30 kristaps 130: #ifdef HAVE_INDEX
131: char *cp;
132: #endif
133:
134: if (0 == sz) {
135: texiwarn(p, "zero-length index entry");
136: return;
137: }
138:
139: /* Look for the index. (Must be found.) */
140: for (i = 0; i < p->indexsz; i++) {
141: if (strlen(p->indexs[i].name) != toksz)
142: continue;
143: if (strncmp(p->indexs[i].name, tok, toksz))
144: continue;
145: break;
146: }
147:
148: assert(i < p->indexsz);
1.31 kristaps 149: isz = p->indexs[i].indexsz;
1.30 kristaps 150: /* Reallocate index's terms. */
151: p->indexs[i].index = realloc
152: (p->indexs[i].index,
1.31 kristaps 153: (isz + 1) * sizeof(struct texiterm));
1.30 kristaps 154: if (NULL == p->indexs[i].index)
155: texiabort(p, NULL);
156:
157: /* Add term to term array. */
1.32 kristaps 158: p->indexs[i].index[isz].chapter = p->nodecur;
1.31 kristaps 159: p->indexs[i].index[isz].term = malloc(sz + 1);
160: if (NULL == p->indexs[i].index[isz].term)
1.30 kristaps 161: texiabort(p, NULL);
1.31 kristaps 162: memcpy(p->indexs[i].index[isz].term, index, sz);
163: p->indexs[i].index[isz].term[sz] = '\0';
1.30 kristaps 164:
165: /* Output mdoc(7) for index. */
166: #ifdef HAVE_INDEX
167: p->seenvs = -1;
168: teximacroopen(p, "Ix");
169: texiputchars(p, "idx");
170: texiputchars(p, p->indexs[i].name);
1.31 kristaps 171: cp = p->indexs[i].index[isz].term;
1.32 kristaps 172: while ('\n' != *cp)
1.30 kristaps 173: texiputchar(p, *cp++);
174: teximacroclose(p);
175: #endif
176: p->indexs[i].indexsz++;
177: }
178:
179: /*
180: * Add an index entry named "tok" of length "sz".
181: * This usually consists of two letters, e.g., "cp" or "vr".
182: * This does nothing if the index exists or is zero-sized.
183: */
184: void
185: texindex_add(struct texi *p, const char *tok, size_t sz)
186: {
187: size_t i;
188: char *cp;
189:
190: if (0 == sz)
191: return;
192:
193: /* Make sure we don't have a duplicate. */
194: for (i = 0; i < p->indexsz; i++) {
195: if (strlen(p->indexs[i].name) != sz)
196: continue;
197: if (strncmp(p->indexs[i].name, tok, sz))
198: continue;
199: return;
200: }
201:
202: /* Reallocate indices. */
203: p->indexs = realloc(p->indexs,
1.31 kristaps 204: sizeof(struct texidex) *
205: (p->indexsz + 1));
1.30 kristaps 206: if (NULL == p->indexs)
207: texiabort(p, NULL);
208: if (NULL == (cp = malloc(sz + 1)))
209: texiabort(p, NULL);
210: memcpy(cp, tok, sz);
211: cp[sz] = '\0';
212: p->indexs[p->indexsz].name = cp;
213: p->indexs[p->indexsz].index = NULL;
214: p->indexs[p->indexsz].indexsz = 0;
215: p->indexsz++;
216: }
217:
1.1 kristaps 218: /*
219: * Unmap all files that we're currently using and free all resources
220: * that we've allocated during the parse.
221: * The utility should exit(...) after this is called.
222: */
223: void
224: texiexit(struct texi *p)
225: {
226: size_t i;
227:
228: /* Make sure we're newline-terminated. */
229: if (p->outcol)
1.20 kristaps 230: fputc('\n', p->outfile);
231: if (NULL != p->chapters)
232: teximdocclose(p, 1);
1.1 kristaps 233:
234: /* Unmap all files. */
235: while (p->filepos > 0)
236: texifilepop(p);
237:
1.7 kristaps 238: for (i = 0; i < p->macrosz; i++)
239: teximacrofree(&p->macros[i]);
1.1 kristaps 240: for (i = 0; i < p->dirsz; i++)
241: free(p->dirs[i]);
1.4 kristaps 242: for (i = 0; i < p->indexsz; i++)
1.30 kristaps 243: texidex_free(&p->indexs[i]);
1.7 kristaps 244: for (i = 0; i < p->valsz; i++)
245: texivaluefree(&p->vals[i]);
1.4 kristaps 246:
1.32 kristaps 247: free(p->nodecache);
1.7 kristaps 248: free(p->macros);
1.1 kristaps 249: free(p->vals);
1.4 kristaps 250: free(p->indexs);
1.1 kristaps 251: free(p->dirs);
252: free(p->subtitle);
253: free(p->title);
1.26 kristaps 254: free(p->copying);
1.1 kristaps 255: }
256:
257: /*
258: * Fatal error: unmap all files and exit.
259: * The "errstring" is passed to perror(3).
260: */
261: void
262: texiabort(struct texi *p, const char *errstring)
263: {
264:
265: perror(errstring);
266: texiexit(p);
267: exit(EXIT_FAILURE);
268: }
269:
270: /*
271: * Print a generic warning message (to stderr) tied to our current
272: * location in the parse sequence.
273: */
274: void
275: texiwarn(const struct texi *p, const char *fmt, ...)
276: {
1.15 kristaps 277: va_list ap;
278: const struct texifile *f;
279:
280: f = &p->files[p->filepos - 1];
281:
282: if (f->insplice)
283: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
284: "warning: ", f->name, f->line + 1,
285: f->col + 1, f->insplice);
286: else
287: fprintf(stderr, "%s:%zu:%zu: warning: ",
288: f->name, f->line + 1, f->col + 1);
1.1 kristaps 289:
290: va_start(ap, fmt);
291: vfprintf(stderr, fmt, ap);
292: va_end(ap);
293: fputc('\n', stderr);
294: }
295:
296: /*
297: * Print an error message (to stderr) tied to our current location in
298: * the parse sequence, invoke texiexit(), then die.
299: */
300: void
301: texierr(struct texi *p, const char *fmt, ...)
302: {
1.15 kristaps 303: va_list ap;
304: struct texifile *f;
305:
306: f = &p->files[p->filepos - 1];
307:
308: if (f->insplice)
309: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
310: "error: ", f->name, f->line + 1,
311: f->col + 1, f->insplice);
312: else
313: fprintf(stderr, "%s:%zu:%zu: error: ",
314: f->name, f->line + 1, f->col + 1);
1.1 kristaps 315:
316: va_start(ap, fmt);
317: vfprintf(stderr, fmt, ap);
318: va_end(ap);
319: fputc('\n', stderr);
320: texiexit(p);
321: exit(EXIT_FAILURE);
322: }
323:
324: /*
325: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 326: * Escape starting a line with a control character and slashes.
1.1 kristaps 327: */
328: void
329: texiputchar(struct texi *p, char c)
330: {
331:
332: if (p->ign)
333: return;
334: if ('.' == c && 0 == p->outcol)
1.20 kristaps 335: fputs("\\&", p->outfile);
1.10 kristaps 336: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 337: fputs("\\&", p->outfile);
1.1 kristaps 338:
1.23 kristaps 339: if (p->uppercase)
340: fputc(toupper((unsigned int)c), p->outfile);
341: else
342: fputc(c, p->outfile);
1.13 kristaps 343: if ('\\' == c)
1.20 kristaps 344: fputc('e', p->outfile);
1.1 kristaps 345: if ('\n' == c) {
346: p->outcol = 0;
347: p->seenws = 0;
348: } else
349: p->outcol++;
350: }
351:
352: /*
1.13 kristaps 353: * Put an opaque series of characters.
354: * Characters starting a line with a control character are escaped, but
355: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 356: */
357: void
358: texiputchars(struct texi *p, const char *s)
359: {
360:
1.13 kristaps 361: if (p->ign)
362: return;
363: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 364: fputs("\\&", p->outfile);
1.13 kristaps 365: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 366: fputs("\\&", p->outfile);
1.23 kristaps 367: if (p->uppercase)
368: for ( ; '\0' != *s; s++)
369: p->outcol += fputc(toupper
370: ((unsigned int)*s), p->outfile);
371: else
372: p->outcol += fputs(s, p->outfile);
1.9 kristaps 373: }
374:
375: /*
376: * This puts all characters onto the output stream but makes sure to
377: * escape mdoc(7) slashes.
1.14 kristaps 378: * FIXME: useless.
1.9 kristaps 379: */
380: void
1.14 kristaps 381: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 382: {
383:
1.14 kristaps 384: for ( ; start < end; start++)
385: texiputchar(p, BUF(p)[start]);
1.1 kristaps 386: }
387:
388: /*
389: * Close an mdoc(7) macro opened with teximacroopen().
390: * If there are no more macros on the line, prints a newline.
391: */
392: void
393: teximacroclose(struct texi *p)
394: {
395:
1.30 kristaps 396: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 397: return;
398:
399: if (0 == --p->outmacro) {
1.20 kristaps 400: fputc('\n', p->outfile);
1.1 kristaps 401: p->outcol = p->seenws = 0;
402: }
403: }
404:
405: /*
406: * Open a mdoc(7) macro.
407: * This is used for line macros, e.g., Qq [foo bar baz].
408: * It can be invoked for nested macros, e.g., Qq Li foo .
409: * TODO: flush-right punctuation (e.g., parenthesis).
410: */
411: void
412: teximacroopen(struct texi *p, const char *s)
413: {
414: int rc;
415:
1.30 kristaps 416: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 417: return;
418:
419: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 420: fputc('\n', p->outfile);
1.1 kristaps 421: p->outcol = 0;
422: }
423:
1.30 kristaps 424: if (p->seenvs > 0 && 0 == p->outmacro)
425: fputs(".Pp\n", p->outfile);
426:
1.1 kristaps 427: if (0 == p->outmacro)
1.20 kristaps 428: fputc('.', p->outfile);
1.1 kristaps 429: else
1.20 kristaps 430: fputc(' ', p->outfile);
1.1 kristaps 431:
1.20 kristaps 432: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 433: p->outcol += rc;
434:
1.20 kristaps 435: fputc(' ', p->outfile);
1.1 kristaps 436: p->outcol++;
437: p->outmacro++;
1.30 kristaps 438: p->seenws = p->seenvs = 0;
1.1 kristaps 439: }
440:
441: /*
442: * Put a stadnalone mdoc(7) command with the trailing newline.
443: */
444: void
445: teximacro(struct texi *p, const char *s)
446: {
447:
448: if (p->ign)
449: return;
450:
451: if (p->outmacro)
452: texierr(p, "\"%s\" in open line scope!?", s);
453: if (p->literal)
454: texierr(p, "\"%s\" in a literal scope!?", s);
455: if (p->outcol)
1.20 kristaps 456: fputc('\n', p->outfile);
1.30 kristaps 457: if (p->seenvs > 0)
458: fputs(".Pp\n", p->outfile);
1.1 kristaps 459:
1.20 kristaps 460: fputc('.', p->outfile);
461: fputs(s, p->outfile);
462: fputc('\n', p->outfile);
1.1 kristaps 463: p->outcol = p->seenws = 0;
464: }
465:
466: /*
467: * Introduce vertical space during normal (non-macro) input.
468: */
469: void
470: texivspace(struct texi *p)
471: {
472:
1.30 kristaps 473: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
474: p->seenvs = 1;
1.1 kristaps 475: }
476:
477: /*
478: * Advance by a single byte in the input stream, adjusting our location
479: * in the current input file.
480: */
481: void
1.14 kristaps 482: advance(struct texi *p, size_t *pos)
1.1 kristaps 483: {
1.15 kristaps 484: struct texifile *f;
1.1 kristaps 485:
1.15 kristaps 486: f = &p->files[p->filepos - 1];
487:
488: if (0 == f->insplice) {
489: if ('\n' == BUF(p)[*pos]) {
490: f->line++;
491: f->col = 0;
492: } else
493: f->col++;
1.17 kristaps 494: } else {
1.15 kristaps 495: --f->insplice;
1.17 kristaps 496: if (0 == f->insplice)
497: f->depth = 0;
498: }
1.1 kristaps 499:
500: (*pos)++;
501: }
502:
503: /*
504: * It's common to wait punctuation to float on the right side of macro
505: * lines in mdoc(7), e.g., ".Em hello ) ."
506: * This function does so, and should be called before teximacroclose().
507: * It will detect that it's the last in the nested macros and
508: * appropriately flush-left punctuation alongside the macro.
509: */
510: void
1.14 kristaps 511: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 512: {
513: size_t start, end;
514:
515: if (1 != p->outmacro)
516: return;
517:
1.14 kristaps 518: for (start = end = *pos; end < BUFSZ(p); end++) {
519: switch (BUF(p)[end]) {
1.1 kristaps 520: case (','):
521: case (')'):
522: case ('.'):
523: case ('"'):
524: case (':'):
1.22 kristaps 525: case (';'):
1.1 kristaps 526: case ('!'):
527: case ('?'):
528: continue;
529: default:
530: break;
531: }
532: break;
533: }
534: if (end == *pos)
535: return;
1.14 kristaps 536: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 kristaps 537: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 538: for ( ; start < end; start++) {
539: texiputchar(p, ' ');
1.14 kristaps 540: texiputchar(p, BUF(p)[start]);
541: advance(p, pos);
1.1 kristaps 542: }
543: }
544: }
545:
546: /*
547: * Advance to the next non-whitespace word in the input stream.
548: * If we're in literal mode, then print all of the whitespace as we're
549: * doing so.
550: */
551: static size_t
1.14 kristaps 552: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 553: {
554:
555: if (p->literal) {
1.14 kristaps 556: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
557: texiputchar(p, BUF(p)[*pos]);
558: advance(p, pos);
1.1 kristaps 559: }
560: return(*pos);
561: }
562:
1.14 kristaps 563: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 564: p->seenws = 1;
1.30 kristaps 565: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
566: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
567: p->seenvs = 1;
1.14 kristaps 568: advance(p, pos);
1.1 kristaps 569: }
570: return(*pos);
571: }
572:
573: /*
574: * Advance to the EOLN in the input stream.
1.22 kristaps 575: * This will skip over '@' markers in an effort to ignore escaped
576: * newlines.
1.1 kristaps 577: */
578: size_t
1.14 kristaps 579: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 580: {
581:
1.22 kristaps 582: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
583: if ('@' == BUF(p)[*pos])
584: advance(p, pos);
1.33 ! kristaps 585: if (*pos < BUFSZ(p))
! 586: advance(p, pos);
1.22 kristaps 587: }
1.14 kristaps 588: if (*pos < BUFSZ(p) && consumenl)
589: advance(p, pos);
1.1 kristaps 590: return(*pos);
591: }
592:
593: /*
594: * Advance to position "end", which is an absolute position in the
595: * current buffer greater than or equal to the current position.
596: */
597: void
1.14 kristaps 598: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 599: {
600:
601: assert(*pos <= end);
602: while (*pos < end)
1.14 kristaps 603: advance(p, pos);
1.1 kristaps 604: }
605:
1.7 kristaps 606: static void
1.17 kristaps 607: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 608: {
1.11 kristaps 609: size_t valsz, realsz, aasz, asz,
610: ssz, i, j, k, start, end;
611: char *val;
612: char **args;
613: const char *cp;
1.7 kristaps 614:
1.17 kristaps 615: /* Disregard empty macros. */
1.22 kristaps 616: if (0 == (valsz = realsz = strlen(m->value))) {
617: args = argparse(p, pos, &asz, m->argsz);
618: for (i = 0; i < asz; i++)
619: free(args[i]);
620: free(args);
1.17 kristaps 621: return;
1.22 kristaps 622: }
1.17 kristaps 623:
624: /*
625: * This is important: it protect us from macros that invoke more
626: * macros, possibly going on infinitely.
627: * We use "sv" instead of the current position because we might
628: * be invoked at the end of the macro (i.e., insplice == 0).
629: * The "sv" value was initialised at the start of the macro.
630: */
631: if (sv > 0)
1.24 kristaps 632: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 633: texierr(p, "maximium recursive depth");
634:
1.14 kristaps 635: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 636: if (asz != m->argsz)
637: texiwarn(p, "invalid macro argument length");
638: aasz = asz < m->argsz ? asz : m->argsz;
639:
640: if (0 == aasz) {
1.21 kristaps 641: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 642: return;
643: }
644:
645: val = strdup(m->value);
646:
647: for (i = j = 0; i < realsz; i++) {
648: /* Parse blindly til the backslash delimiter. */
649: if ('\\' != m->value[i]) {
650: val[j++] = m->value[i];
651: val[j] = '\0';
652: continue;
653: } else if (i == realsz - 1)
654: texierr(p, "trailing argument name delimiter");
655:
656: /* Double-backslash is escaped. */
657: if ('\\' == m->value[i + 1]) {
658: val[j++] = m->value[i++];
659: val[j] = '\0';
660: continue;
661: }
662:
663: assert('\\' == m->value[i] && i < realsz - 1);
664:
665: /* Parse to terminating delimiter. */
666: /* FIXME: embedded, escaped delimiters? */
667: for (start = end = i + 1; end < realsz; end++)
668: if ('\\' == m->value[end])
669: break;
670: if (end == realsz)
671: texierr(p, "unterminated argument name");
672:
673: for (k = 0; k < aasz; k++) {
674: if ((ssz = strlen(m->args[k])) != (end - start))
675: continue;
676: if (strncmp(&m->value[start], m->args[k], ssz))
677: continue;
678: break;
679: }
680:
681: /*
682: * Argument didn't exist in argument table.
1.14 kristaps 683: * Just ignore it.
1.7 kristaps 684: */
685: if (k == aasz) {
1.14 kristaps 686: i = end;
1.7 kristaps 687: continue;
688: }
689:
690: if (strlen(args[k]) > ssz) {
691: valsz += strlen(args[k]);
692: val = realloc(val, valsz + 1);
693: if (NULL == val)
694: texiabort(p, NULL);
695: }
696:
1.11 kristaps 697: for (cp = args[k]; '\0' != *cp; cp++)
698: val[j++] = *cp;
699:
700: val[j] = '\0';
1.7 kristaps 701: i = end;
702: }
703:
1.21 kristaps 704: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 705:
706: for (i = 0; i < asz; i++)
707: free(args[i]);
708: free(args);
709: free(val);
710: }
711:
1.1 kristaps 712: /*
713: * Output a free-form word in the input stream, progressing to the next
714: * command or white-space.
715: * This also will advance the input stream.
716: */
717: static void
1.14 kristaps 718: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 719: {
1.29 kristaps 720: size_t i, end, len;
721: int c;
1.1 kristaps 722:
1.25 kristaps 723: /*
1.27 kristaps 724: * If a prior word had a terminating double-newline, then begin
725: * this text block with a `Pp'.
726: * We don't do this if we're in a literal context (we'll print
727: * out the newlines themselves) nor in a `TS' table.
728: */
1.30 kristaps 729: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
730: if (p->outcol > 0)
731: fputc('\n', p->outfile);
732: fputs(".Pp\n", p->outfile);
733: p->outcol = 0;
734: }
1.27 kristaps 735:
736: /*
1.25 kristaps 737: * Some line control: if we (non-macro, non-literal) already
738: * have more than 72 characters written to the screen, then
739: * output a newline before getting started.
740: */
1.1 kristaps 741: if (p->seenws && 0 == p->outmacro &&
742: p->outcol > 72 && 0 == p->literal)
743: texiputchar(p, '\n');
1.25 kristaps 744:
745: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 746: if (p->seenws && p->outcol && 0 == p->literal)
747: texiputchar(p, ' ');
748:
749: p->seenws = 0;
1.29 kristaps 750:
751: /*
752: * If we're in a macro line, we might want to print text that
753: * happens to be the same as an mdoc(7) macro.
754: * Obviously, we need to escape these words.
755: */
756: if (p->outmacro) {
757: end = *pos;
758: /* Read ahead to get the word length. */
759: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
760: switch ((c = BUF(p)[end])) {
761: case ('@'):
762: case ('}'):
763: case ('{'):
764: break;
765: default:
766: if ('\0' != extra && extra == c)
767: break;
768: end++;
769: continue;
770: }
771: break;
772: }
773: len = end - *pos;
774: /* See if we have a match. */
775: for (i = 0; NULL != mdocs[i]; i++) {
776: /* All macros are 2 or three letters. */
777: if (len < 2 || len > 3)
778: continue;
779: /* Check the macro word length. */
780: if ('\0' == mdocs[i][2] && 2 != len)
781: continue;
782: else if ('\0' == mdocs[i][3] && 3 != len)
783: continue;
784: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
785: continue;
786: texiputchars(p, "\\&");
787: break;
788: }
789: }
1.1 kristaps 790:
1.14 kristaps 791: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
792: switch (BUF(p)[*pos]) {
1.1 kristaps 793: case ('@'):
794: case ('}'):
795: case ('{'):
796: return;
797: }
1.14 kristaps 798: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 799: return;
1.28 kristaps 800:
801: if (p->literal) {
802: texiputchar(p, BUF(p)[*pos]);
803: advance(p, pos);
804: continue;
805: }
806:
1.30 kristaps 807: if ('"' == BUF(p)[*pos]) {
808: texiputchars(p, "\\(dq");
809: } else if (*pos < BUFSZ(p) - 2 &&
1.28 kristaps 810: '-' == BUF(p)[*pos] &&
811: '-' == BUF(p)[*pos + 1] &&
812: '-' == BUF(p)[*pos + 2]) {
813: texiputchars(p, "\\(em");
814: advance(p, pos);
815: advance(p, pos);
816: } else if (*pos < BUFSZ(p) - 1 &&
817: '-' == BUF(p)[*pos] &&
818: '-' == BUF(p)[*pos + 1]) {
819: texiputchars(p, "\\(en");
820: advance(p, pos);
821: } else if (*pos < BUFSZ(p) - 1 &&
1.14 kristaps 822: '`' == BUF(p)[*pos] &&
823: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 824: texiputchars(p, "\\(lq");
1.14 kristaps 825: advance(p, pos);
826: } else if (*pos < BUFSZ(p) - 1 &&
827: '\'' == BUF(p)[*pos] &&
828: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 829: texiputchars(p, "\\(rq");
1.14 kristaps 830: advance(p, pos);
1.1 kristaps 831: } else
1.14 kristaps 832: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 833:
1.14 kristaps 834: advance(p, pos);
1.1 kristaps 835: }
1.25 kristaps 836:
837: /*
838: * New sentence, new line:if we (non-macro, non-literal) see a
839: * period at the end of the last printed word, then open a
840: * newline.
841: */
1.30 kristaps 842: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
843: switch (BUF(p)[*pos - 1]) {
844: case ('.'):
845: case ('!'):
846: case ('?'):
847: texiputchar(p, '\n');
848: break;
849: default:
850: break;
851: }
852:
853: p->seenvs = 0;
1.1 kristaps 854: }
855:
856: /*
857: * Look up the command at position "pos" in the buffer, returning it (or
858: * TEXICMD__MAX if none found) and setting "end" to be the absolute
859: * index after the command name.
860: */
861: enum texicmd
1.19 kristaps 862: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 863: {
1.4 kristaps 864: size_t i, len, toksz;
1.1 kristaps 865:
1.14 kristaps 866: assert('@' == BUF(p)[pos]);
1.1 kristaps 867:
1.7 kristaps 868: if (NULL != macro)
869: *macro = NULL;
870:
1.14 kristaps 871: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 872: return(TEXICMD__MAX);
1.14 kristaps 873: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 874: return(TEXICMD__MAX);
875:
876: /* Alphabetic commands are special. */
1.23 kristaps 877: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 878: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 879: return(TEXICMD__MAX);
880: for (i = 0; i < TEXICMD__MAX; i++) {
881: if (1 != texitoks[i].len)
882: continue;
1.14 kristaps 883: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 884: return(i);
885: }
1.14 kristaps 886: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 887: return(TEXICMD__MAX);
888: }
889:
1.4 kristaps 890: /* Scan to the end of the possible command name. */
1.14 kristaps 891: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
892: if ((*end > pos && ('@' == BUF(p)[*end] ||
893: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 894: break;
895:
1.4 kristaps 896: /* Look for the command. */
1.1 kristaps 897: len = *end - pos;
898: for (i = 0; i < TEXICMD__MAX; i++) {
899: if (len != texitoks[i].len)
900: continue;
1.14 kristaps 901: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 902: return(i);
903: }
904:
1.4 kristaps 905: /* Look for it in our indices. */
906: for (i = 0; i < p->indexsz; i++) {
1.30 kristaps 907: toksz = strlen(p->indexs[i].name);
1.4 kristaps 908: if (len != 5 + toksz)
909: continue;
1.30 kristaps 910: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 911: continue;
1.14 kristaps 912: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 913: return(TEXICMD_USER_INDEX);
914: }
915:
916: for (i = 0; i < p->macrosz; i++) {
917: if (len != strlen(p->macros[i].key))
918: continue;
1.14 kristaps 919: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 920: continue;
921: if (NULL != macro)
922: *macro = &p->macros[i];
923: return(TEXICMD__MAX);
1.4 kristaps 924: }
925:
1.14 kristaps 926: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 927: return(TEXICMD__MAX);
928: }
929:
930: /*
931: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
932: * Num should be set to the argument we're currently parsing, although
933: * it suffixes for it to be zero or non-zero.
934: * This will return 1 if there are more arguments, 0 otherwise.
935: * This will stop (returning 0) in the event of EOF or if we're not at a
936: * bracket for the zeroth parse.
937: */
938: int
1.14 kristaps 939: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 940: {
1.17 kristaps 941: size_t end, sv;
1.7 kristaps 942: enum texicmd cmd;
943: struct teximacro *macro;
1.1 kristaps 944:
1.14 kristaps 945: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
946: advance(p, pos);
947: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 948: return(0);
949: if (0 == num)
1.14 kristaps 950: advance(p, pos);
1.1 kristaps 951:
1.14 kristaps 952: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
953: switch (BUF(p)[*pos]) {
1.1 kristaps 954: case (','):
1.14 kristaps 955: advance(p, pos);
1.1 kristaps 956: return(1);
957: case ('}'):
1.14 kristaps 958: advance(p, pos);
1.1 kristaps 959: return(0);
960: case ('{'):
961: if (0 == p->ign)
962: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 963: advance(p, pos);
1.1 kristaps 964: continue;
965: case ('@'):
966: break;
967: default:
1.14 kristaps 968: parseword(p, pos, ',');
1.1 kristaps 969: continue;
970: }
971:
1.17 kristaps 972: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 973: cmd = texicmd(p, *pos, &end, ¯o);
974: advanceto(p, pos, end);
1.7 kristaps 975: if (NULL != macro)
1.17 kristaps 976: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 977: if (TEXICMD__MAX == cmd)
978: continue;
979: if (NULL != texitoks[cmd].fp)
1.14 kristaps 980: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 981: }
982: return(0);
983: }
984:
985: /*
986: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
987: * This will stop in the event of EOF or if we're not at a bracket.
988: */
989: void
1.18 kristaps 990: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 991: {
1.18 kristaps 992: size_t end, sv, stack;
1.7 kristaps 993: enum texicmd cmd;
994: struct teximacro *macro;
1.1 kristaps 995:
1.14 kristaps 996: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
997: advance(p, pos);
1.1 kristaps 998:
1.14 kristaps 999: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 1000: return;
1.14 kristaps 1001: advance(p, pos);
1.1 kristaps 1002:
1.18 kristaps 1003: stack = 0;
1.14 kristaps 1004: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1005: switch (BUF(p)[*pos]) {
1.1 kristaps 1006: case ('}'):
1.18 kristaps 1007: if (stack > 0) {
1008: stack--;
1009: advance(p, pos);
1010: texiputchar(p, '}');
1011: continue;
1012: }
1.14 kristaps 1013: advance(p, pos);
1.1 kristaps 1014: return;
1015: case ('{'):
1.18 kristaps 1016: if (dostack) {
1017: stack++;
1018: advance(p, pos);
1019: texiputchar(p, '{');
1020: continue;
1021: }
1.1 kristaps 1022: if (0 == p->ign)
1023: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1024: advance(p, pos);
1.1 kristaps 1025: continue;
1026: case ('@'):
1027: break;
1028: default:
1.14 kristaps 1029: parseword(p, pos, '\0');
1.1 kristaps 1030: continue;
1031: }
1032:
1.17 kristaps 1033: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1034: cmd = texicmd(p, *pos, &end, ¯o);
1035: advanceto(p, pos, end);
1.7 kristaps 1036: if (NULL != macro)
1.17 kristaps 1037: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1038: if (TEXICMD__MAX == cmd)
1039: continue;
1040: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1041: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1042: }
1043: }
1044:
1045: /*
1046: * This should be invoked when we're on a macro line and want to process
1047: * to the end of the current input line, doing all of our macros along
1048: * the way.
1049: */
1050: void
1.14 kristaps 1051: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1052: {
1.17 kristaps 1053: size_t end, sv;
1.7 kristaps 1054: enum texicmd cmd;
1055: struct teximacro *macro;
1.1 kristaps 1056:
1.14 kristaps 1057: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1058: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1059: p->seenws = 1;
1060: if (p->literal)
1.14 kristaps 1061: texiputchar(p, BUF(p)[*pos]);
1062: advance(p, pos);
1.33 ! kristaps 1063: }
! 1064: if (*pos == BUFSZ(p)) {
! 1065: texiwarn(p, "unexpected EOF");
! 1066: return;
1.1 kristaps 1067: }
1.14 kristaps 1068: switch (BUF(p)[*pos]) {
1.1 kristaps 1069: case ('}'):
1070: if (0 == p->ign)
1071: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1072: advance(p, pos);
1.1 kristaps 1073: continue;
1074: case ('{'):
1075: if (0 == p->ign)
1076: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1077: advance(p, pos);
1.1 kristaps 1078: continue;
1.30 kristaps 1079: case ('\n'):
1080: continue;
1.1 kristaps 1081: case ('@'):
1082: break;
1083: default:
1.14 kristaps 1084: parseword(p, pos, '\0');
1.1 kristaps 1085: continue;
1086: }
1087:
1.17 kristaps 1088: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1089: cmd = texicmd(p, *pos, &end, ¯o);
1090: advanceto(p, pos, end);
1.7 kristaps 1091: if (NULL != macro)
1.17 kristaps 1092: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1093: if (TEXICMD__MAX == cmd)
1094: continue;
1095: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1096: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1097: }
1.14 kristaps 1098:
1099: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1100: advance(p, pos);
1.19 kristaps 1101: }
1102:
1.30 kristaps 1103: enum texicmd
1104: peeklinecmd(const struct texi *p, size_t pos)
1105: {
1106: size_t end;
1107:
1108: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
1109: pos++;
1110: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1111: return(TEXICMD__MAX);
1112: return(texicmd(p, pos, &end, NULL));
1113: }
1114:
1.19 kristaps 1115: /*
1116: * Peek to see if there's a command after subsequent whitespace.
1117: * If so, return the macro identifier.
1118: * This DOES NOT work with user-defined macros.
1119: */
1120: enum texicmd
1121: peekcmd(const struct texi *p, size_t pos)
1122: {
1123: size_t end;
1124:
1125: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1126: pos++;
1127: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1128: return(TEXICMD__MAX);
1129: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1130: }
1131:
1132: /*
1133: * Parse a single word or command.
1134: * This will return immediately at the EOF.
1135: */
1.32 kristaps 1136: void
1.14 kristaps 1137: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1138: {
1.17 kristaps 1139: size_t end, sv;
1.7 kristaps 1140: enum texicmd cmd;
1141: struct teximacro *macro;
1.1 kristaps 1142:
1.14 kristaps 1143: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1144: return;
1145:
1.14 kristaps 1146: switch (BUF(p)[*pos]) {
1.1 kristaps 1147: case ('}'):
1148: if (0 == p->ign)
1149: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1150: advance(p, pos);
1.1 kristaps 1151: return;
1152: case ('{'):
1153: if (0 == p->ign)
1154: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1155: advance(p, pos);
1.1 kristaps 1156: return;
1157: case ('@'):
1158: break;
1159: default:
1.14 kristaps 1160: parseword(p, pos, '\0');
1.1 kristaps 1161: return;
1162: }
1163:
1.17 kristaps 1164: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1165: cmd = texicmd(p, *pos, &end, ¯o);
1166: advanceto(p, pos, end);
1.7 kristaps 1167: if (NULL != macro)
1.17 kristaps 1168: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1169: if (TEXICMD__MAX == cmd)
1170: return;
1171: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1172: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1173: }
1174:
1175: /*
1176: * This is used in the @deffn type of command.
1177: * These have an arbitrary number of line arguments; however, these
1178: * arguments may or may not be surrounded by brackets.
1179: * In this function, we parse each one as either a bracketed or
1180: * non-bracketed argument, returning 0 when we've reached the end of
1181: * line or 1 otherwise.
1182: */
1183: int
1.14 kristaps 1184: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1185: {
1186:
1.14 kristaps 1187: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1188: p->seenws = 1;
1.14 kristaps 1189: advance(p, pos);
1.1 kristaps 1190: }
1191:
1.14 kristaps 1192: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1193: parsebracket(p, pos, 0);
1.14 kristaps 1194: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1195: parsesingle(p, pos);
1.1 kristaps 1196: else
1197: return(0);
1198:
1199: return(1);
1200: }
1201:
1202: /*
1203: * Parse til the end of the buffer.
1204: */
1.14 kristaps 1205: static void
1206: parseeof(struct texi *p)
1.1 kristaps 1207: {
1208: size_t pos;
1209:
1.14 kristaps 1210: for (pos = 0; pos < BUFSZ(p); )
1211: parsesingle(p, &pos);
1.1 kristaps 1212: }
1213:
1.8 kristaps 1214: void
1.21 kristaps 1215: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1216: {
1.14 kristaps 1217: char *cp;
1218: struct texifile *f;
1.8 kristaps 1219:
1.14 kristaps 1220: assert(p->filepos > 0);
1221: f = &p->files[p->filepos - 1];
1.8 kristaps 1222:
1.14 kristaps 1223: if (f->mapsz + sz > f->mapmaxsz) {
1224: f->mapmaxsz = f->mapsz + sz + 1024;
1225: cp = realloc(f->map, f->mapmaxsz);
1226: if (NULL == cp)
1227: texiabort(p, NULL);
1228: f->map = cp;
1229: }
1.8 kristaps 1230:
1.15 kristaps 1231: f->insplice += sz;
1.21 kristaps 1232: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1233: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1234: f->mapsz += sz;
1.8 kristaps 1235: }
1236:
1237: /*
1.1 kristaps 1238: * Parse a block sequence until we have the "@end endtoken" command
1239: * invocation.
1240: * This will return immediately at EOF.
1241: */
1242: void
1.14 kristaps 1243: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1244: {
1.17 kristaps 1245: size_t end, sv;
1.7 kristaps 1246: enum texicmd cmd;
1247: size_t endtoksz;
1248: struct teximacro *macro;
1.1 kristaps 1249:
1250: endtoksz = strlen(endtoken);
1251: assert(endtoksz > 0);
1252:
1.14 kristaps 1253: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1254: switch (BUF(p)[*pos]) {
1.1 kristaps 1255: case ('}'):
1256: if (0 == p->ign)
1257: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1258: advance(p, pos);
1.1 kristaps 1259: continue;
1260: case ('{'):
1261: if (0 == p->ign)
1262: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1263: advance(p, pos);
1.1 kristaps 1264: continue;
1265: case ('@'):
1266: break;
1267: default:
1.14 kristaps 1268: parseword(p, pos, '\0');
1.1 kristaps 1269: continue;
1270: }
1271:
1.17 kristaps 1272: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1273: cmd = texicmd(p, *pos, &end, ¯o);
1274: advanceto(p, pos, end);
1.1 kristaps 1275: if (TEXICMD_END == cmd) {
1.14 kristaps 1276: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1277: advance(p, pos);
1.1 kristaps 1278: /*
1279: * FIXME: check the full word, not just its
1280: * initial substring!
1281: */
1.14 kristaps 1282: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1283: (&BUF(p)[*pos], endtoken, endtoksz)) {
1284: advanceeoln(p, pos, 0);
1.1 kristaps 1285: break;
1286: }
1287: if (0 == p->ign)
1288: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1289: advanceeoln(p, pos, 0);
1.1 kristaps 1290: continue;
1.7 kristaps 1291: }
1292: if (NULL != macro)
1.17 kristaps 1293: texiexecmacro(p, macro, sv, pos);
1.7 kristaps 1294: if (TEXICMD__MAX == cmd)
1295: continue;
1296: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1297: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1298: }
1.30 kristaps 1299:
1300: if (*pos == BUFSZ(p))
1301: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1302: }
1303:
1304: /*
1.12 kristaps 1305: * Like parsefile() but used for reading from stdandard input.
1306: * This can only be called for the first file!
1307: */
1308: void
1309: parsestdin(struct texi *p)
1310: {
1311: struct texifile *f;
1312: ssize_t ssz;
1313:
1314: assert(0 == p->filepos);
1315: f = &p->files[p->filepos];
1316: memset(f, 0, sizeof(struct texifile));
1317:
1318: f->type = TEXISRC_STDIN;
1319: f->name = "<stdin>";
1320:
1.14 kristaps 1321: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1322: if (f->mapsz == f->mapmaxsz) {
1323: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1324: texierr(p, "stdin buffer too long");
1.14 kristaps 1325: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1326: 2 * f->mapmaxsz : 65536;
1327: f->map = realloc(f->map, f->mapmaxsz);
1.12 kristaps 1328: if (NULL == f->map)
1329: texiabort(p, NULL);
1330: }
1.14 kristaps 1331: ssz = read(STDIN_FILENO, f->map +
1332: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1333: if (0 == ssz)
1334: break;
1335: else if (-1 == ssz)
1336: texiabort(p, NULL);
1337: }
1338:
1339: p->filepos++;
1.14 kristaps 1340: parseeof(p);
1.12 kristaps 1341: texifilepop(p);
1342: }
1343:
1344: /*
1.1 kristaps 1345: * Memory-map the file "fname" and begin parsing it unless "parse" is
1346: * zero, in which case we just dump the file to stdout (making sure it
1347: * doesn't trip up mdoc(7) along the way).
1348: * This can be called in a nested context.
1349: */
1350: void
1351: parsefile(struct texi *p, const char *fname, int parse)
1352: {
1353: struct texifile *f;
1354: int fd;
1355: struct stat st;
1356: size_t i;
1.14 kristaps 1357: char *map;
1.1 kristaps 1358:
1.5 kristaps 1359: if (64 == p->filepos)
1.6 kristaps 1360: texierr(p, "too many open files");
1.1 kristaps 1361: f = &p->files[p->filepos];
1362: memset(f, 0, sizeof(struct texifile));
1363:
1.12 kristaps 1364: f->type = TEXISRC_FILE;
1.1 kristaps 1365: f->name = fname;
1366: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1367: texiabort(p, fname);
1368: } else if (-1 == fstat(fd, &st)) {
1369: close(fd);
1370: texiabort(p, fname);
1371: }
1372:
1.14 kristaps 1373: f->mapsz = f->mapmaxsz = st.st_size;
1374: map = mmap(NULL, f->mapsz,
1.1 kristaps 1375: PROT_READ, MAP_SHARED, fd, 0);
1376: close(fd);
1377:
1.14 kristaps 1378: if (MAP_FAILED == map)
1.1 kristaps 1379: texiabort(p, fname);
1380:
1381: if ( ! parse) {
1.13 kristaps 1382: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1383: texiputchar(p, map[i]);
1.13 kristaps 1384: if (p->outcol)
1385: texiputchar(p, '\n');
1.14 kristaps 1386: munmap(map, f->mapsz);
1387: return;
1388: }
1389:
1390: p->filepos++;
1391: f->map = malloc(f->mapsz);
1392: memcpy(f->map, map, f->mapsz);
1393: munmap(map, f->mapsz);
1394: parseeof(p);
1.1 kristaps 1395: texifilepop(p);
1396: }
1397:
1.2 kristaps 1398: /*
1399: * Look up the value to a stored pair's value starting in "buf" from
1400: * start to end.
1401: * Return the pointer to the value memory, which can be NULL if the
1402: * pointer key does not exist.
1403: * The pointer can point to NULL if the value has been unset.
1404: */
1405: static char **
1.14 kristaps 1406: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1407: {
1408: size_t i, sz, len;
1409:
1410: assert(end >= start);
1411: /* Ignore zero-length. */
1412: if (0 == (len = (end - start)))
1413: return(NULL);
1414: for (i = 0; i < p->valsz; i++) {
1415: sz = strlen(p->vals[i].key);
1416: if (sz != len)
1417: continue;
1.14 kristaps 1418: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1419: return(&p->vals[i].value);
1420: }
1421: return(NULL);
1422: }
1423:
1424: /*
1425: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1426: * pointer to its value via valuequery().
1427: */
1428: static char **
1.14 kristaps 1429: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1430: {
1431: size_t start, end;
1432: char **ret;
1433:
1.14 kristaps 1434: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1435: advance(p, pos);
1436: if (*pos == BUFSZ(p))
1.2 kristaps 1437: return(NULL);
1.14 kristaps 1438: for (start = end = *pos; end < BUFSZ(p); end++)
1439: if ('\n' == BUF(p)[end])
1.2 kristaps 1440: break;
1.14 kristaps 1441: advanceto(p, pos, end);
1442: if (*pos < BUFSZ(p)) {
1443: assert('\n' == BUF(p)[*pos]);
1444: advance(p, pos);
1.2 kristaps 1445: }
1.14 kristaps 1446: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1447: return(NULL);
1448: return(ret);
1449: }
1450:
1451: void
1.14 kristaps 1452: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1453: {
1454: char **ret;
1455:
1.14 kristaps 1456: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1457: return;
1458: free(*ret);
1459: *ret = NULL;
1460: }
1461:
1462: const char *
1.14 kristaps 1463: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1464: {
1465: char **ret;
1466:
1.14 kristaps 1467: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1468: return(NULL);
1469: return(*ret);
1470: }
1471:
1472: /*
1473: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1474: * the pointer to its value.
1475: * If the returned pointer is NULL, either there was no string within
1476: * the brackets (or no brackets), or the value was not found, or the
1477: * value had previously been unset.
1478: */
1479: const char *
1.14 kristaps 1480: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1481: {
1482: size_t start, end;
1483: char **ret;
1484:
1.14 kristaps 1485: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1486: advance(p, pos);
1487: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1488: return(NULL);
1.14 kristaps 1489: advance(p, pos);
1490: for (start = end = *pos; end < BUFSZ(p); end++)
1491: if ('}' == BUF(p)[end])
1.2 kristaps 1492: break;
1.14 kristaps 1493: advanceto(p, pos, end);
1494: if (*pos < BUFSZ(p)) {
1495: assert('}' == BUF(p)[*pos]);
1496: advance(p, pos);
1.2 kristaps 1497: }
1.14 kristaps 1498: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1499: return(NULL);
1500: return(*ret);
1501: }
1502:
1503: void
1504: valueadd(struct texi *p, char *key, char *val)
1505: {
1506: size_t i;
1507:
1508: assert(NULL != key);
1509: assert(NULL != val);
1510:
1511: for (i = 0; i < p->valsz; i++)
1512: if (0 == strcmp(p->vals[i].key, key))
1513: break;
1514:
1515: if (i < p->valsz) {
1516: free(key);
1517: free(p->vals[i].value);
1518: p->vals[i].value = val;
1519: } else {
1.4 kristaps 1520: /* FIXME: reallocarray() */
1.2 kristaps 1521: p->vals = realloc(p->vals,
1522: (p->valsz + 1) *
1523: sizeof(struct texivalue));
1.4 kristaps 1524: if (NULL == p->vals)
1525: texiabort(p, NULL);
1.2 kristaps 1526: p->vals[p->valsz].key = key;
1527: p->vals[p->valsz].value = val;
1528: p->valsz++;
1529: }
1.7 kristaps 1530: }
1531:
1532: /*
1533: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1534: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1535: * an array of arguments of size "argsz".
1536: * These need to be freed individually and as a whole.
1537: * NOTE: this will puke on @, or @} macros, which can trick it into
1538: * stopping argument parsing earlier.
1539: * Ergo, textual: this doesn't interpret the arguments in any way.
1540: */
1541: char **
1.14 kristaps 1542: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1543: {
1544: char **args;
1545: size_t start, end, stack;
1546:
1.14 kristaps 1547: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1548: advance(p, pos);
1.7 kristaps 1549:
1550: args = NULL;
1551: *argsz = 0;
1552:
1.17 kristaps 1553: if (*pos == BUFSZ(p))
1554: return(args);
1555:
1.14 kristaps 1556: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1557: /*
1558: * Special case: if we encounter an unbracketed argument
1559: * and we're being invoked with non-zero arguments
1560: * (versus being set, i.e., hint>0), then parse until
1561: * the end of line.
1562: */
1563: *argsz = 1;
1564: args = calloc(1, sizeof(char *));
1565: if (NULL == args)
1566: texiabort(p, NULL);
1567: start = *pos;
1.14 kristaps 1568: while (*pos < BUFSZ(p)) {
1569: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1570: break;
1.14 kristaps 1571: advance(p, pos);
1.10 kristaps 1572: }
1573: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1574: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1575: args[0][*pos - start] = '\0';
1.14 kristaps 1576: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1577: advance(p, pos);
1.10 kristaps 1578: return(args);
1.14 kristaps 1579: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1580: return(args);
1.17 kristaps 1581:
1582: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1583:
1584: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1585: advance(p, pos);
1586: while (*pos < BUFSZ(p)) {
1587: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1588: advance(p, pos);
1.7 kristaps 1589: start = *pos;
1590: stack = 0;
1.14 kristaps 1591: while (*pos < BUFSZ(p)) {
1.7 kristaps 1592: /*
1593: * According to the manual, commas within
1594: * embedded commands are escaped.
1595: * We keep track of embedded-ness in the "stack"
1596: * state anyway, so this is free.
1597: */
1.14 kristaps 1598: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1599: break;
1.14 kristaps 1600: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1601: break;
1.14 kristaps 1602: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1603: stack--;
1.14 kristaps 1604: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1605: stack++;
1.14 kristaps 1606: advance(p, pos);
1.7 kristaps 1607: }
1608: if (stack)
1609: texiwarn(p, "unterminated macro "
1610: "in macro arguments");
1.14 kristaps 1611: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1612: break;
1613: /* Test for zero-length '{ }'. */
1.14 kristaps 1614: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1615: break;
1616: /* FIXME: use reallocarray. */
1617: args = realloc
1618: (args, sizeof(char *) *
1619: (*argsz + 1));
1620: if (NULL == args)
1621: texiabort(p, NULL);
1622: args[*argsz] = malloc(end - start + 1);
1623: if (NULL == args[*argsz])
1624: texiabort(p, NULL);
1625: memcpy(args[*argsz],
1.14 kristaps 1626: &BUF(p)[start], end - start);
1.7 kristaps 1627: args[*argsz][end - start] = '\0';
1628: (*argsz)++;
1.14 kristaps 1629: if ('}' == BUF(p)[*pos])
1.7 kristaps 1630: break;
1.14 kristaps 1631: advance(p, pos);
1.7 kristaps 1632: }
1633:
1.14 kristaps 1634: if (*pos == BUFSZ(p))
1.7 kristaps 1635: texierr(p, "unterminated arguments");
1.14 kristaps 1636: assert('}' == BUF(p)[*pos]);
1637: advance(p, pos);
1.7 kristaps 1638: return(args);
1.2 kristaps 1639: }
1.20 kristaps 1640:
1641: /*
1642: * If we're printing chapters, then do some naviation here and then
1643: * close our outfile.
1644: * I want to call this the SEE ALSO section, but that's not really what
1645: * it is: we'll refer to the "initial" (top) node and the next and
1646: * previous chapters.
1647: */
1648: void
1649: teximdocclose(struct texi *p, int last)
1650: {
1651: char buf[PATH_MAX];
1652:
1.32 kristaps 1653: if (NULL == p->chapters || 1 == p->nodesz)
1.20 kristaps 1654: return;
1655:
1656: teximacro(p, "Sh INFO NAVIGATION");
1657:
1658: /* Print a reference to the "top" node. */
1.32 kristaps 1659: if (-1 != p->nodecache[p->nodecur].up) {
1.22 kristaps 1660: texiputchars(p, "Top node,");
1.32 kristaps 1661: snprintf(buf, sizeof(buf), "%s-%zd 7",
1662: p->chapters, p->nodecache[p->nodecur].up);
1.31 kristaps 1663: p->seenvs = 0;
1.20 kristaps 1664: teximacroopen(p, "Xr ");
1665: texiputchars(p, buf);
1.22 kristaps 1666: texiputchars(p, " ;");
1.20 kristaps 1667: teximacroclose(p);
1668: }
1669:
1.32 kristaps 1670: if (-1 != p->nodecache[p->nodecur].prev) {
1.22 kristaps 1671: texiputchars(p, "previous node,");
1.32 kristaps 1672: snprintf(buf, sizeof(buf), "%s-%zd 7",
1673: p->chapters, p->nodecache[p->nodecur].prev);
1.31 kristaps 1674: p->seenvs = 0;
1.20 kristaps 1675: teximacroopen(p, "Xr ");
1676: texiputchars(p, buf);
1677: if ( ! last)
1.22 kristaps 1678: texiputchars(p, " ;");
1.20 kristaps 1679: teximacroclose(p);
1680: }
1681:
1.32 kristaps 1682: if (-1 != p->nodecache[p->nodecur].next) {
1683: texiputchars(p, "next node,");
1684: snprintf(buf, sizeof(buf), "%s-%zd 7",
1685: p->chapters, p->nodecache[p->nodecur].next);
1.31 kristaps 1686: p->seenvs = 0;
1.20 kristaps 1687: teximacroopen(p, "Xr ");
1688: texiputchars(p, buf);
1689: teximacroclose(p);
1690: }
1691:
1692: fclose(p->outfile);
1.32 kristaps 1693: p->outfile = NULL;
1694: }
1695:
1696: ssize_t
1697: texicache(struct texi *p, const char *buf, size_t sz)
1698: {
1699: size_t i;
1700:
1701: for (i = 0; i < p->nodecachesz; i++) {
1702: if (sz != strlen(p->nodecache[i].name))
1703: continue;
1704: if (strncmp(buf, p->nodecache[i].name, sz))
1705: continue;
1706: break;
1707: }
1708: if (i < p->nodecachesz)
1709: return(i);
1710: if (NULL == buf)
1711: return(-1);
1712: p->nodecache = realloc
1713: (p->nodecache,
1714: (p->nodecachesz + 1) * sizeof(struct texinode));
1715: if (NULL == p->nodecache)
1716: texiabort(p, NULL);
1717: p->nodecache[p->nodecachesz].name = malloc(sz + 1);
1718: if (NULL == p->nodecache[p->nodecachesz].name)
1719: texiabort(p, NULL);
1720: memcpy(p->nodecache[p->nodecachesz].name, buf, sz);
1721: p->nodecache[p->nodecachesz].name[sz] = '\0';
1722: p->nodecache[p->nodecachesz].up =
1723: p->nodecache[p->nodecachesz].next =
1724: p->nodecache[p->nodecachesz].prev = -1;
1725: p->nodecachesz++;
1726: return(p->nodecachesz - 1);
1.20 kristaps 1727: }
1728:
1729: /*
1.32 kristaps 1730: * Here we print our standard mdoc(7) prologue.
1731: * We use the title set with @settitle for the `Nd' description
1732: * and the source document filename (the first one as invoked on
1733: * the command line) for the title.
1734: * The date is set to the current date.
1.20 kristaps 1735: */
1736: void
1.21 kristaps 1737: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1738: {
1739: const char *cp;
1740: time_t t;
1741: char date[32];
1742:
1743: t = time(NULL);
1744: strftime(date, sizeof(date), "%F", localtime(&t));
1745:
1.30 kristaps 1746: p->seenvs = -1;
1.20 kristaps 1747: teximacroopen(p, "Dd");
1748: texiputchars(p, date);
1749: teximacroclose(p);
1750: teximacroopen(p, "Dt");
1751: for (cp = p->title; '\0' != *cp; cp++)
1752: texiputchar(p, toupper((unsigned int)*cp));
1753: texiputchars(p, " 7");
1754: teximacroclose(p);
1755: teximacro(p, "Os");
1756: teximacro(p, "Sh NAME");
1757: teximacroopen(p, "Nm");
1758: for (cp = p->title; '\0' != *cp; cp++)
1759: texiputchar(p, *cp);
1760: teximacroclose(p);
1761: teximacroopen(p, "Nd");
1.21 kristaps 1762: /*
1763: * The subtitle `Nd' can consist of arbitrary macros, so paste
1764: * it and parse to the end of the line.
1765: */
1766: if (NULL != p->subtitle) {
1767: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1768: parseeoln(p, pos);
1769: } else
1.20 kristaps 1770: texiputchars(p, "Unknown description");
1771: teximacroclose(p);
1772: }
1773:
CVSweb