Annotation of texi2mdoc/util.c, Revision 1.36
1.36 ! schwarze 1: /* $Id: util.c,v 1.35 2018/11/13 08:45:29 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include "extern.h"
31:
32: /*
1.29 kristaps 33: * Table of macros.
34: * These ABSOLUTELY MUST BE 2 or three characters long.
35: */
36: static const char *const mdocs[] = {
37: "Ap", "Dd", "Dt", "Os",
38: "Sh", "Ss", "Pp", "D1",
39: "Dl", "Bd", "Ed", "Bl",
40: "El", "It", "Ad", "An",
41: "Ar", "Cd", "Cm", "Dv",
42: "Er", "Ev", "Ex", "Fa",
43: "Fd", "Fl", "Fn", "Ft",
44: "Ic", "In", "Li", "Nd",
45: "Nm", "Op", "Ot", "Pa",
46: "Rv", "St", "Va", "Vt",
47: "Xr", "%A", "%B", "%D",
48: "%I", "%J", "%N", "%O",
49: "%P", "%R", "%T", "%V",
50: "Ac", "Ao", "Aq", "At",
51: "Bc", "Bf", "Bo", "Bq",
52: "Bsx", "Bx", "Db", "Dc",
53: "Do", "Dq", "Ec", "Ef",
54: "Em", "Eo", "Fx", "Ms",
55: "No", "Ns", "Nx", "Ox",
56: "Pc", "Pf", "Po", "Pq",
57: "Qc", "Ql", "Qo", "Qq",
58: "Re", "Rs", "Sc", "So",
59: "Sq", "Sm", "Sx", "Sy",
60: "Tn", "Ux", "Xc", "Xo",
61: "Fo", "Fc", "Oo", "Oc",
62: "Bk", "Ek", "Bt", "Hf",
63: "Fr", "Ud", "Lb", "Lp",
64: "Lk", "Mt", "Brq", "Bro",
65: "Brc", "%C", "Es", "En",
66: "Dx", "%Q", "br", "sp",
67: "%U", "Ta", "ll", NULL,
68: };
69:
70: /*
1.1 kristaps 71: * Unmap the top-most file in the stack of files currently opened (that
72: * is, nested calls to parsefile()).
73: */
74: void
75: texifilepop(struct texi *p)
76: {
77: struct texifile *f;
78:
79: assert(p->filepos > 0);
80: f = &p->files[--p->filepos];
1.14 kristaps 81: free(f->map);
1.1 kristaps 82: }
83:
1.7 kristaps 84: static void
85: teximacrofree(struct teximacro *p)
86: {
87: size_t i;
88:
89: for (i = 0; i < p->argsz; i++)
90: free(p->args[i]);
91:
92: free(p->args);
93: free(p->key);
94: free(p->value);
95: }
96:
97: static void
98: texivaluefree(struct texivalue *p)
99: {
100:
101: free(p->key);
102: free(p->value);
103: }
104:
1.30 kristaps 105: static void
106: texidex_free(struct texidex *p)
107: {
108: size_t i;
109:
110: for (i = 0; i < p->indexsz; i++)
1.31 kristaps 111: free(p->index[i].term);
1.30 kristaps 112:
113: free(p->index);
114: free(p->name);
115: p->index = NULL;
116: p->indexsz = 0;
117: }
118:
119: /*
120: * Add the text beginning at "index" and of "sz" bytes to the index
121: * named "tok" with name size "toksz".
122: * This will also output the necessary mdoc(7) to construct the index.
123: */
124: void
125: texindex(struct texi *p, const char *tok,
126: size_t toksz, const char *index, size_t sz)
127: {
1.31 kristaps 128: size_t i, isz;
1.30 kristaps 129: #ifdef HAVE_INDEX
130: char *cp;
131: #endif
132:
133: if (0 == sz) {
134: texiwarn(p, "zero-length index entry");
135: return;
136: }
137:
138: /* Look for the index. (Must be found.) */
139: for (i = 0; i < p->indexsz; i++) {
140: if (strlen(p->indexs[i].name) != toksz)
141: continue;
142: if (strncmp(p->indexs[i].name, tok, toksz))
143: continue;
144: break;
145: }
146:
147: assert(i < p->indexsz);
1.31 kristaps 148: isz = p->indexs[i].indexsz;
1.30 kristaps 149: /* Reallocate index's terms. */
150: p->indexs[i].index = realloc
151: (p->indexs[i].index,
1.31 kristaps 152: (isz + 1) * sizeof(struct texiterm));
1.30 kristaps 153: if (NULL == p->indexs[i].index)
154: texiabort(p, NULL);
155:
156: /* Add term to term array. */
1.32 kristaps 157: p->indexs[i].index[isz].chapter = p->nodecur;
1.31 kristaps 158: p->indexs[i].index[isz].term = malloc(sz + 1);
159: if (NULL == p->indexs[i].index[isz].term)
1.30 kristaps 160: texiabort(p, NULL);
1.31 kristaps 161: memcpy(p->indexs[i].index[isz].term, index, sz);
162: p->indexs[i].index[isz].term[sz] = '\0';
1.34 schwarze 163:
1.30 kristaps 164: /* Output mdoc(7) for index. */
165: #ifdef HAVE_INDEX
166: p->seenvs = -1;
167: teximacroopen(p, "Ix");
168: texiputchars(p, "idx");
169: texiputchars(p, p->indexs[i].name);
1.31 kristaps 170: cp = p->indexs[i].index[isz].term;
1.32 kristaps 171: while ('\n' != *cp)
1.30 kristaps 172: texiputchar(p, *cp++);
173: teximacroclose(p);
174: #endif
175: p->indexs[i].indexsz++;
176: }
177:
178: /*
179: * Add an index entry named "tok" of length "sz".
180: * This usually consists of two letters, e.g., "cp" or "vr".
181: * This does nothing if the index exists or is zero-sized.
182: */
183: void
184: texindex_add(struct texi *p, const char *tok, size_t sz)
185: {
186: size_t i;
187: char *cp;
188:
189: if (0 == sz)
190: return;
191:
192: /* Make sure we don't have a duplicate. */
193: for (i = 0; i < p->indexsz; i++) {
194: if (strlen(p->indexs[i].name) != sz)
195: continue;
196: if (strncmp(p->indexs[i].name, tok, sz))
197: continue;
198: return;
199: }
200:
201: /* Reallocate indices. */
1.34 schwarze 202: p->indexs = realloc(p->indexs,
203: sizeof(struct texidex) *
1.31 kristaps 204: (p->indexsz + 1));
1.30 kristaps 205: if (NULL == p->indexs)
206: texiabort(p, NULL);
207: if (NULL == (cp = malloc(sz + 1)))
208: texiabort(p, NULL);
209: memcpy(cp, tok, sz);
210: cp[sz] = '\0';
211: p->indexs[p->indexsz].name = cp;
212: p->indexs[p->indexsz].index = NULL;
213: p->indexs[p->indexsz].indexsz = 0;
214: p->indexsz++;
215: }
216:
1.1 kristaps 217: /*
218: * Unmap all files that we're currently using and free all resources
219: * that we've allocated during the parse.
220: * The utility should exit(...) after this is called.
221: */
222: void
223: texiexit(struct texi *p)
224: {
225: size_t i;
226:
227: /* Make sure we're newline-terminated. */
228: if (p->outcol)
1.20 kristaps 229: fputc('\n', p->outfile);
230: if (NULL != p->chapters)
231: teximdocclose(p, 1);
1.1 kristaps 232:
233: /* Unmap all files. */
234: while (p->filepos > 0)
235: texifilepop(p);
236:
1.7 kristaps 237: for (i = 0; i < p->macrosz; i++)
238: teximacrofree(&p->macros[i]);
1.1 kristaps 239: for (i = 0; i < p->dirsz; i++)
240: free(p->dirs[i]);
1.4 kristaps 241: for (i = 0; i < p->indexsz; i++)
1.30 kristaps 242: texidex_free(&p->indexs[i]);
1.34 schwarze 243: for (i = 0; i < p->valsz; i++)
1.7 kristaps 244: texivaluefree(&p->vals[i]);
1.4 kristaps 245:
1.32 kristaps 246: free(p->nodecache);
1.7 kristaps 247: free(p->macros);
1.1 kristaps 248: free(p->vals);
1.4 kristaps 249: free(p->indexs);
1.1 kristaps 250: free(p->dirs);
251: free(p->subtitle);
252: free(p->title);
1.26 kristaps 253: free(p->copying);
1.1 kristaps 254: }
255:
256: /*
257: * Fatal error: unmap all files and exit.
258: * The "errstring" is passed to perror(3).
259: */
260: void
261: texiabort(struct texi *p, const char *errstring)
262: {
263:
264: perror(errstring);
265: texiexit(p);
266: exit(EXIT_FAILURE);
267: }
268:
269: /*
270: * Print a generic warning message (to stderr) tied to our current
271: * location in the parse sequence.
272: */
273: void
274: texiwarn(const struct texi *p, const char *fmt, ...)
275: {
1.34 schwarze 276: va_list ap;
1.15 kristaps 277: const struct texifile *f;
278:
279: f = &p->files[p->filepos - 1];
280:
281: if (f->insplice)
282: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
1.34 schwarze 283: "warning: ", f->name, f->line + 1,
1.15 kristaps 284: f->col + 1, f->insplice);
285: else
286: fprintf(stderr, "%s:%zu:%zu: warning: ",
287: f->name, f->line + 1, f->col + 1);
1.1 kristaps 288:
289: va_start(ap, fmt);
290: vfprintf(stderr, fmt, ap);
291: va_end(ap);
292: fputc('\n', stderr);
293: }
294:
295: /*
296: * Print an error message (to stderr) tied to our current location in
297: * the parse sequence, invoke texiexit(), then die.
298: */
299: void
300: texierr(struct texi *p, const char *fmt, ...)
301: {
1.34 schwarze 302: va_list ap;
1.15 kristaps 303: struct texifile *f;
304:
305: f = &p->files[p->filepos - 1];
306:
1.34 schwarze 307: if (f->insplice)
1.15 kristaps 308: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
1.34 schwarze 309: "error: ", f->name, f->line + 1,
1.15 kristaps 310: f->col + 1, f->insplice);
311: else
312: fprintf(stderr, "%s:%zu:%zu: error: ",
313: f->name, f->line + 1, f->col + 1);
1.1 kristaps 314:
315: va_start(ap, fmt);
316: vfprintf(stderr, fmt, ap);
317: va_end(ap);
318: fputc('\n', stderr);
319: texiexit(p);
320: exit(EXIT_FAILURE);
321: }
322:
323: /*
324: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 325: * Escape starting a line with a control character and slashes.
1.1 kristaps 326: */
327: void
328: texiputchar(struct texi *p, char c)
329: {
330:
331: if (p->ign)
332: return;
333: if ('.' == c && 0 == p->outcol)
1.20 kristaps 334: fputs("\\&", p->outfile);
1.10 kristaps 335: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 336: fputs("\\&", p->outfile);
1.1 kristaps 337:
1.23 kristaps 338: if (p->uppercase)
339: fputc(toupper((unsigned int)c), p->outfile);
340: else
341: fputc(c, p->outfile);
1.13 kristaps 342: if ('\\' == c)
1.20 kristaps 343: fputc('e', p->outfile);
1.1 kristaps 344: if ('\n' == c) {
345: p->outcol = 0;
346: p->seenws = 0;
347: } else
348: p->outcol++;
349: }
350:
351: /*
1.13 kristaps 352: * Put an opaque series of characters.
353: * Characters starting a line with a control character are escaped, but
354: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 355: */
356: void
357: texiputchars(struct texi *p, const char *s)
358: {
359:
1.13 kristaps 360: if (p->ign)
361: return;
362: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 363: fputs("\\&", p->outfile);
1.13 kristaps 364: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 365: fputs("\\&", p->outfile);
1.34 schwarze 366: if (p->uppercase)
1.23 kristaps 367: for ( ; '\0' != *s; s++)
368: p->outcol += fputc(toupper
369: ((unsigned int)*s), p->outfile);
370: else
371: p->outcol += fputs(s, p->outfile);
1.9 kristaps 372: }
373:
374: /*
375: * This puts all characters onto the output stream but makes sure to
376: * escape mdoc(7) slashes.
1.14 kristaps 377: * FIXME: useless.
1.9 kristaps 378: */
379: void
1.14 kristaps 380: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 381: {
382:
1.14 kristaps 383: for ( ; start < end; start++)
384: texiputchar(p, BUF(p)[start]);
1.1 kristaps 385: }
386:
387: /*
388: * Close an mdoc(7) macro opened with teximacroopen().
389: * If there are no more macros on the line, prints a newline.
390: */
391: void
392: teximacroclose(struct texi *p)
393: {
394:
1.30 kristaps 395: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 396: return;
397:
398: if (0 == --p->outmacro) {
1.20 kristaps 399: fputc('\n', p->outfile);
1.1 kristaps 400: p->outcol = p->seenws = 0;
401: }
402: }
403:
404: /*
405: * Open a mdoc(7) macro.
406: * This is used for line macros, e.g., Qq [foo bar baz].
407: * It can be invoked for nested macros, e.g., Qq Li foo .
408: * TODO: flush-right punctuation (e.g., parenthesis).
409: */
410: void
411: teximacroopen(struct texi *p, const char *s)
412: {
413: int rc;
414:
1.30 kristaps 415: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 416: return;
417:
418: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 419: fputc('\n', p->outfile);
1.1 kristaps 420: p->outcol = 0;
421: }
422:
1.30 kristaps 423: if (p->seenvs > 0 && 0 == p->outmacro)
424: fputs(".Pp\n", p->outfile);
425:
1.1 kristaps 426: if (0 == p->outmacro)
1.20 kristaps 427: fputc('.', p->outfile);
1.1 kristaps 428: else
1.20 kristaps 429: fputc(' ', p->outfile);
1.1 kristaps 430:
1.20 kristaps 431: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 432: p->outcol += rc;
433:
1.20 kristaps 434: fputc(' ', p->outfile);
1.1 kristaps 435: p->outcol++;
436: p->outmacro++;
1.30 kristaps 437: p->seenws = p->seenvs = 0;
1.1 kristaps 438: }
439:
440: /*
441: * Put a stadnalone mdoc(7) command with the trailing newline.
442: */
443: void
444: teximacro(struct texi *p, const char *s)
445: {
446:
447: if (p->ign)
448: return;
449:
450: if (p->outmacro)
451: texierr(p, "\"%s\" in open line scope!?", s);
452: if (p->literal)
453: texierr(p, "\"%s\" in a literal scope!?", s);
454: if (p->outcol)
1.20 kristaps 455: fputc('\n', p->outfile);
1.30 kristaps 456: if (p->seenvs > 0)
457: fputs(".Pp\n", p->outfile);
1.1 kristaps 458:
1.20 kristaps 459: fputc('.', p->outfile);
460: fputs(s, p->outfile);
461: fputc('\n', p->outfile);
1.1 kristaps 462: p->outcol = p->seenws = 0;
463: }
464:
465: /*
466: * Introduce vertical space during normal (non-macro) input.
467: */
468: void
469: texivspace(struct texi *p)
470: {
471:
1.30 kristaps 472: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
473: p->seenvs = 1;
1.1 kristaps 474: }
475:
476: /*
477: * Advance by a single byte in the input stream, adjusting our location
478: * in the current input file.
479: */
480: void
1.14 kristaps 481: advance(struct texi *p, size_t *pos)
1.1 kristaps 482: {
1.15 kristaps 483: struct texifile *f;
1.1 kristaps 484:
1.15 kristaps 485: f = &p->files[p->filepos - 1];
486:
487: if (0 == f->insplice) {
488: if ('\n' == BUF(p)[*pos]) {
489: f->line++;
490: f->col = 0;
491: } else
492: f->col++;
1.17 kristaps 493: } else {
1.15 kristaps 494: --f->insplice;
1.17 kristaps 495: if (0 == f->insplice)
496: f->depth = 0;
497: }
1.1 kristaps 498:
499: (*pos)++;
500: }
501:
502: /*
503: * It's common to wait punctuation to float on the right side of macro
504: * lines in mdoc(7), e.g., ".Em hello ) ."
505: * This function does so, and should be called before teximacroclose().
506: * It will detect that it's the last in the nested macros and
507: * appropriately flush-left punctuation alongside the macro.
508: */
509: void
1.14 kristaps 510: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 511: {
512: size_t start, end;
513:
514: if (1 != p->outmacro)
515: return;
516:
1.14 kristaps 517: for (start = end = *pos; end < BUFSZ(p); end++) {
518: switch (BUF(p)[end]) {
1.1 kristaps 519: case (','):
520: case (')'):
521: case ('.'):
522: case ('"'):
523: case (':'):
1.22 kristaps 524: case (';'):
1.1 kristaps 525: case ('!'):
526: case ('?'):
527: continue;
528: default:
529: break;
530: }
531: break;
532: }
533: if (end == *pos)
534: return;
1.34 schwarze 535: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 kristaps 536: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 537: for ( ; start < end; start++) {
538: texiputchar(p, ' ');
1.14 kristaps 539: texiputchar(p, BUF(p)[start]);
540: advance(p, pos);
1.1 kristaps 541: }
542: }
543: }
544:
545: /*
546: * Advance to the next non-whitespace word in the input stream.
547: * If we're in literal mode, then print all of the whitespace as we're
548: * doing so.
549: */
550: static size_t
1.14 kristaps 551: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 552: {
1.34 schwarze 553:
1.1 kristaps 554: if (p->literal) {
1.14 kristaps 555: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
556: texiputchar(p, BUF(p)[*pos]);
557: advance(p, pos);
1.1 kristaps 558: }
559: return(*pos);
1.34 schwarze 560: }
1.1 kristaps 561:
1.14 kristaps 562: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 563: p->seenws = 1;
1.30 kristaps 564: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
565: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
566: p->seenvs = 1;
1.14 kristaps 567: advance(p, pos);
1.1 kristaps 568: }
569: return(*pos);
570: }
571:
572: /*
573: * Advance to the EOLN in the input stream.
1.22 kristaps 574: * This will skip over '@' markers in an effort to ignore escaped
575: * newlines.
1.1 kristaps 576: */
577: size_t
1.14 kristaps 578: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 579: {
580:
1.22 kristaps 581: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
582: if ('@' == BUF(p)[*pos])
583: advance(p, pos);
1.33 kristaps 584: if (*pos < BUFSZ(p))
585: advance(p, pos);
1.22 kristaps 586: }
1.14 kristaps 587: if (*pos < BUFSZ(p) && consumenl)
588: advance(p, pos);
1.1 kristaps 589: return(*pos);
590: }
591:
592: /*
593: * Advance to position "end", which is an absolute position in the
594: * current buffer greater than or equal to the current position.
595: */
596: void
1.14 kristaps 597: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 598: {
599:
600: assert(*pos <= end);
1.34 schwarze 601: while (*pos < end)
1.14 kristaps 602: advance(p, pos);
1.1 kristaps 603: }
604:
1.7 kristaps 605: static void
1.17 kristaps 606: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 607: {
1.34 schwarze 608: size_t valsz, realsz, aasz, asz,
1.11 kristaps 609: ssz, i, j, k, start, end;
610: char *val;
611: char **args;
612: const char *cp;
1.7 kristaps 613:
1.17 kristaps 614: /* Disregard empty macros. */
1.22 kristaps 615: if (0 == (valsz = realsz = strlen(m->value))) {
616: args = argparse(p, pos, &asz, m->argsz);
617: for (i = 0; i < asz; i++)
618: free(args[i]);
619: free(args);
1.17 kristaps 620: return;
1.22 kristaps 621: }
1.17 kristaps 622:
623: /*
624: * This is important: it protect us from macros that invoke more
625: * macros, possibly going on infinitely.
626: * We use "sv" instead of the current position because we might
627: * be invoked at the end of the macro (i.e., insplice == 0).
628: * The "sv" value was initialised at the start of the macro.
629: */
630: if (sv > 0)
1.24 kristaps 631: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 632: texierr(p, "maximium recursive depth");
633:
1.14 kristaps 634: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 635: if (asz != m->argsz)
636: texiwarn(p, "invalid macro argument length");
637: aasz = asz < m->argsz ? asz : m->argsz;
638:
639: if (0 == aasz) {
1.21 kristaps 640: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 641: return;
642: }
643:
644: val = strdup(m->value);
645:
646: for (i = j = 0; i < realsz; i++) {
647: /* Parse blindly til the backslash delimiter. */
648: if ('\\' != m->value[i]) {
649: val[j++] = m->value[i];
650: val[j] = '\0';
651: continue;
652: } else if (i == realsz - 1)
653: texierr(p, "trailing argument name delimiter");
654:
655: /* Double-backslash is escaped. */
656: if ('\\' == m->value[i + 1]) {
657: val[j++] = m->value[i++];
658: val[j] = '\0';
659: continue;
660: }
661:
662: assert('\\' == m->value[i] && i < realsz - 1);
663:
664: /* Parse to terminating delimiter. */
665: /* FIXME: embedded, escaped delimiters? */
1.34 schwarze 666: for (start = end = i + 1; end < realsz; end++)
1.7 kristaps 667: if ('\\' == m->value[end])
668: break;
669: if (end == realsz)
670: texierr(p, "unterminated argument name");
671:
672: for (k = 0; k < aasz; k++) {
673: if ((ssz = strlen(m->args[k])) != (end - start))
674: continue;
675: if (strncmp(&m->value[start], m->args[k], ssz))
676: continue;
677: break;
678: }
679:
1.34 schwarze 680: /*
681: * Argument didn't exist in argument table.
1.14 kristaps 682: * Just ignore it.
1.7 kristaps 683: */
684: if (k == aasz) {
1.14 kristaps 685: i = end;
1.7 kristaps 686: continue;
687: }
688:
689: if (strlen(args[k]) > ssz) {
690: valsz += strlen(args[k]);
691: val = realloc(val, valsz + 1);
692: if (NULL == val)
693: texiabort(p, NULL);
694: }
695:
1.34 schwarze 696: for (cp = args[k]; '\0' != *cp; cp++)
1.11 kristaps 697: val[j++] = *cp;
698:
699: val[j] = '\0';
1.7 kristaps 700: i = end;
701: }
702:
1.21 kristaps 703: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 704:
705: for (i = 0; i < asz; i++)
706: free(args[i]);
707: free(args);
708: free(val);
1.34 schwarze 709: }
1.7 kristaps 710:
1.1 kristaps 711: /*
712: * Output a free-form word in the input stream, progressing to the next
713: * command or white-space.
714: * This also will advance the input stream.
715: */
716: static void
1.14 kristaps 717: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 718: {
1.34 schwarze 719: size_t i, end, len;
720: int c;
1.1 kristaps 721:
1.25 kristaps 722: /*
1.27 kristaps 723: * If a prior word had a terminating double-newline, then begin
724: * this text block with a `Pp'.
725: * We don't do this if we're in a literal context (we'll print
726: * out the newlines themselves) nor in a `TS' table.
727: */
1.30 kristaps 728: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
729: if (p->outcol > 0)
730: fputc('\n', p->outfile);
1.36 ! schwarze 731: if (p->ign == 0)
! 732: fputs(".Pp\n", p->outfile);
1.30 kristaps 733: p->outcol = 0;
734: }
1.27 kristaps 735:
736: /*
1.25 kristaps 737: * Some line control: if we (non-macro, non-literal) already
738: * have more than 72 characters written to the screen, then
739: * output a newline before getting started.
740: */
1.34 schwarze 741: if (p->seenws && 0 == p->outmacro &&
1.1 kristaps 742: p->outcol > 72 && 0 == p->literal)
743: texiputchar(p, '\n');
1.25 kristaps 744:
745: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 746: if (p->seenws && p->outcol && 0 == p->literal)
747: texiputchar(p, ' ');
748:
749: p->seenws = 0;
1.29 kristaps 750:
751: /*
752: * If we're in a macro line, we might want to print text that
753: * happens to be the same as an mdoc(7) macro.
754: * Obviously, we need to escape these words.
755: */
756: if (p->outmacro) {
757: end = *pos;
758: /* Read ahead to get the word length. */
759: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
760: switch ((c = BUF(p)[end])) {
761: case ('@'):
762: case ('}'):
763: case ('{'):
764: break;
765: default:
766: if ('\0' != extra && extra == c)
767: break;
768: end++;
769: continue;
770: }
771: break;
772: }
773: len = end - *pos;
774: /* See if we have a match. */
775: for (i = 0; NULL != mdocs[i]; i++) {
776: /* All macros are 2 or three letters. */
777: if (len < 2 || len > 3)
778: continue;
779: /* Check the macro word length. */
780: if ('\0' == mdocs[i][2] && 2 != len)
781: continue;
782: else if ('\0' == mdocs[i][3] && 3 != len)
783: continue;
784: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
785: continue;
786: texiputchars(p, "\\&");
787: break;
788: }
789: }
1.1 kristaps 790:
1.14 kristaps 791: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
792: switch (BUF(p)[*pos]) {
1.1 kristaps 793: case ('@'):
794: case ('}'):
795: case ('{'):
796: return;
797: }
1.14 kristaps 798: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 799: return;
1.28 kristaps 800:
801: if (p->literal) {
802: texiputchar(p, BUF(p)[*pos]);
803: advance(p, pos);
804: continue;
805: }
806:
1.30 kristaps 807: if ('"' == BUF(p)[*pos]) {
808: texiputchars(p, "\\(dq");
1.34 schwarze 809: } else if (*pos < BUFSZ(p) - 2 &&
810: '-' == BUF(p)[*pos] &&
811: '-' == BUF(p)[*pos + 1] &&
1.28 kristaps 812: '-' == BUF(p)[*pos + 2]) {
813: texiputchars(p, "\\(em");
814: advance(p, pos);
815: advance(p, pos);
1.34 schwarze 816: } else if (*pos < BUFSZ(p) - 1 &&
817: '-' == BUF(p)[*pos] &&
1.28 kristaps 818: '-' == BUF(p)[*pos + 1]) {
819: texiputchars(p, "\\(en");
820: advance(p, pos);
1.34 schwarze 821: } else if (*pos < BUFSZ(p) - 1 &&
822: '`' == BUF(p)[*pos] &&
1.14 kristaps 823: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 824: texiputchars(p, "\\(lq");
1.14 kristaps 825: advance(p, pos);
1.34 schwarze 826: } else if (*pos < BUFSZ(p) - 1 &&
827: '\'' == BUF(p)[*pos] &&
1.14 kristaps 828: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 829: texiputchars(p, "\\(rq");
1.14 kristaps 830: advance(p, pos);
1.1 kristaps 831: } else
1.14 kristaps 832: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 833:
1.14 kristaps 834: advance(p, pos);
1.1 kristaps 835: }
1.25 kristaps 836:
1.34 schwarze 837: /*
1.25 kristaps 838: * New sentence, new line:if we (non-macro, non-literal) see a
839: * period at the end of the last printed word, then open a
840: * newline.
841: */
1.30 kristaps 842: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
843: switch (BUF(p)[*pos - 1]) {
844: case ('.'):
845: case ('!'):
846: case ('?'):
847: texiputchar(p, '\n');
848: break;
849: default:
850: break;
851: }
852:
853: p->seenvs = 0;
1.1 kristaps 854: }
855:
856: /*
857: * Look up the command at position "pos" in the buffer, returning it (or
858: * TEXICMD__MAX if none found) and setting "end" to be the absolute
859: * index after the command name.
860: */
861: enum texicmd
1.19 kristaps 862: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 863: {
1.4 kristaps 864: size_t i, len, toksz;
1.1 kristaps 865:
1.14 kristaps 866: assert('@' == BUF(p)[pos]);
1.1 kristaps 867:
1.7 kristaps 868: if (NULL != macro)
869: *macro = NULL;
870:
1.14 kristaps 871: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 872: return(TEXICMD__MAX);
1.14 kristaps 873: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 874: return(TEXICMD__MAX);
875:
876: /* Alphabetic commands are special. */
1.23 kristaps 877: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 878: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 879: return(TEXICMD__MAX);
880: for (i = 0; i < TEXICMD__MAX; i++) {
881: if (1 != texitoks[i].len)
882: continue;
1.14 kristaps 883: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 884: return(i);
885: }
1.14 kristaps 886: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 887: return(TEXICMD__MAX);
888: }
889:
1.4 kristaps 890: /* Scan to the end of the possible command name. */
1.14 kristaps 891: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
1.34 schwarze 892: if ((*end > pos && ('@' == BUF(p)[*end] ||
1.14 kristaps 893: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 894: break;
895:
1.4 kristaps 896: /* Look for the command. */
1.1 kristaps 897: len = *end - pos;
898: for (i = 0; i < TEXICMD__MAX; i++) {
899: if (len != texitoks[i].len)
900: continue;
1.14 kristaps 901: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 902: return(i);
903: }
904:
1.4 kristaps 905: /* Look for it in our indices. */
906: for (i = 0; i < p->indexsz; i++) {
1.30 kristaps 907: toksz = strlen(p->indexs[i].name);
1.4 kristaps 908: if (len != 5 + toksz)
909: continue;
1.30 kristaps 910: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 911: continue;
1.14 kristaps 912: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 913: return(TEXICMD_USER_INDEX);
914: }
915:
916: for (i = 0; i < p->macrosz; i++) {
917: if (len != strlen(p->macros[i].key))
918: continue;
1.14 kristaps 919: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 920: continue;
921: if (NULL != macro)
922: *macro = &p->macros[i];
923: return(TEXICMD__MAX);
1.4 kristaps 924: }
925:
1.14 kristaps 926: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 927: return(TEXICMD__MAX);
928: }
929:
930: /*
931: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
932: * Num should be set to the argument we're currently parsing, although
933: * it suffixes for it to be zero or non-zero.
934: * This will return 1 if there are more arguments, 0 otherwise.
935: * This will stop (returning 0) in the event of EOF or if we're not at a
936: * bracket for the zeroth parse.
937: */
938: int
1.14 kristaps 939: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 940: {
1.17 kristaps 941: size_t end, sv;
1.7 kristaps 942: enum texicmd cmd;
943: struct teximacro *macro;
1.1 kristaps 944:
1.14 kristaps 945: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
946: advance(p, pos);
947: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 948: return(0);
949: if (0 == num)
1.14 kristaps 950: advance(p, pos);
1.1 kristaps 951:
1.14 kristaps 952: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
953: switch (BUF(p)[*pos]) {
1.1 kristaps 954: case (','):
1.14 kristaps 955: advance(p, pos);
1.1 kristaps 956: return(1);
957: case ('}'):
1.14 kristaps 958: advance(p, pos);
1.1 kristaps 959: return(0);
960: case ('{'):
961: if (0 == p->ign)
962: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 963: advance(p, pos);
1.1 kristaps 964: continue;
965: case ('@'):
966: break;
967: default:
1.14 kristaps 968: parseword(p, pos, ',');
1.1 kristaps 969: continue;
970: }
971:
1.17 kristaps 972: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 973: cmd = texicmd(p, *pos, &end, ¯o);
974: advanceto(p, pos, end);
1.7 kristaps 975: if (NULL != macro)
1.17 kristaps 976: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 977: if (TEXICMD__MAX == cmd)
1.1 kristaps 978: continue;
979: if (NULL != texitoks[cmd].fp)
1.14 kristaps 980: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 981: }
982: return(0);
983: }
984:
985: /*
986: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
987: * This will stop in the event of EOF or if we're not at a bracket.
988: */
989: void
1.18 kristaps 990: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 991: {
1.18 kristaps 992: size_t end, sv, stack;
1.7 kristaps 993: enum texicmd cmd;
994: struct teximacro *macro;
1.1 kristaps 995:
1.14 kristaps 996: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
997: advance(p, pos);
1.1 kristaps 998:
1.14 kristaps 999: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 1000: return;
1.14 kristaps 1001: advance(p, pos);
1.1 kristaps 1002:
1.18 kristaps 1003: stack = 0;
1.14 kristaps 1004: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1005: switch (BUF(p)[*pos]) {
1.1 kristaps 1006: case ('}'):
1.18 kristaps 1007: if (stack > 0) {
1008: stack--;
1009: advance(p, pos);
1010: texiputchar(p, '}');
1011: continue;
1012: }
1.14 kristaps 1013: advance(p, pos);
1.1 kristaps 1014: return;
1015: case ('{'):
1.18 kristaps 1016: if (dostack) {
1017: stack++;
1018: advance(p, pos);
1019: texiputchar(p, '{');
1020: continue;
1021: }
1.1 kristaps 1022: if (0 == p->ign)
1023: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1024: advance(p, pos);
1.1 kristaps 1025: continue;
1026: case ('@'):
1027: break;
1028: default:
1.14 kristaps 1029: parseword(p, pos, '\0');
1.1 kristaps 1030: continue;
1031: }
1032:
1.17 kristaps 1033: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1034: cmd = texicmd(p, *pos, &end, ¯o);
1035: advanceto(p, pos, end);
1.7 kristaps 1036: if (NULL != macro)
1.17 kristaps 1037: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1038: if (TEXICMD__MAX == cmd)
1.1 kristaps 1039: continue;
1040: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1041: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1042: }
1043: }
1044:
1045: /*
1046: * This should be invoked when we're on a macro line and want to process
1047: * to the end of the current input line, doing all of our macros along
1048: * the way.
1049: */
1050: void
1.14 kristaps 1051: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1052: {
1.17 kristaps 1053: size_t end, sv;
1.7 kristaps 1054: enum texicmd cmd;
1055: struct teximacro *macro;
1.1 kristaps 1056:
1.14 kristaps 1057: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1058: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1059: p->seenws = 1;
1060: if (p->literal)
1.14 kristaps 1061: texiputchar(p, BUF(p)[*pos]);
1062: advance(p, pos);
1.33 kristaps 1063: }
1064: if (*pos == BUFSZ(p)) {
1065: texiwarn(p, "unexpected EOF");
1066: return;
1.1 kristaps 1067: }
1.14 kristaps 1068: switch (BUF(p)[*pos]) {
1.1 kristaps 1069: case ('}'):
1070: if (0 == p->ign)
1071: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1072: advance(p, pos);
1.1 kristaps 1073: continue;
1074: case ('{'):
1075: if (0 == p->ign)
1076: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1077: advance(p, pos);
1.1 kristaps 1078: continue;
1.30 kristaps 1079: case ('\n'):
1080: continue;
1.1 kristaps 1081: case ('@'):
1082: break;
1083: default:
1.14 kristaps 1084: parseword(p, pos, '\0');
1.1 kristaps 1085: continue;
1086: }
1087:
1.17 kristaps 1088: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1089: cmd = texicmd(p, *pos, &end, ¯o);
1090: advanceto(p, pos, end);
1.7 kristaps 1091: if (NULL != macro)
1.17 kristaps 1092: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1093: if (TEXICMD__MAX == cmd)
1.1 kristaps 1094: continue;
1095: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1096: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1097: }
1.14 kristaps 1098:
1099: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1100: advance(p, pos);
1.19 kristaps 1101: }
1102:
1.30 kristaps 1103: enum texicmd
1104: peeklinecmd(const struct texi *p, size_t pos)
1105: {
1106: size_t end;
1107:
1108: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
1109: pos++;
1110: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1111: return(TEXICMD__MAX);
1112: return(texicmd(p, pos, &end, NULL));
1113: }
1114:
1.19 kristaps 1115: /*
1116: * Peek to see if there's a command after subsequent whitespace.
1117: * If so, return the macro identifier.
1118: * This DOES NOT work with user-defined macros.
1119: */
1120: enum texicmd
1121: peekcmd(const struct texi *p, size_t pos)
1122: {
1123: size_t end;
1124:
1125: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1126: pos++;
1127: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1128: return(TEXICMD__MAX);
1129: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1130: }
1131:
1132: /*
1133: * Parse a single word or command.
1134: * This will return immediately at the EOF.
1135: */
1.32 kristaps 1136: void
1.14 kristaps 1137: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1138: {
1.17 kristaps 1139: size_t end, sv;
1.7 kristaps 1140: enum texicmd cmd;
1141: struct teximacro *macro;
1.1 kristaps 1142:
1.14 kristaps 1143: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1144: return;
1145:
1.14 kristaps 1146: switch (BUF(p)[*pos]) {
1.1 kristaps 1147: case ('}'):
1148: if (0 == p->ign)
1149: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1150: advance(p, pos);
1.1 kristaps 1151: return;
1152: case ('{'):
1153: if (0 == p->ign)
1154: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1155: advance(p, pos);
1.1 kristaps 1156: return;
1157: case ('@'):
1158: break;
1159: default:
1.14 kristaps 1160: parseword(p, pos, '\0');
1.1 kristaps 1161: return;
1162: }
1163:
1.17 kristaps 1164: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1165: cmd = texicmd(p, *pos, &end, ¯o);
1166: advanceto(p, pos, end);
1.7 kristaps 1167: if (NULL != macro)
1.17 kristaps 1168: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1169: if (TEXICMD__MAX == cmd)
1.1 kristaps 1170: return;
1171: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1172: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1173: }
1174:
1175: /*
1176: * This is used in the @deffn type of command.
1177: * These have an arbitrary number of line arguments; however, these
1178: * arguments may or may not be surrounded by brackets.
1179: * In this function, we parse each one as either a bracketed or
1180: * non-bracketed argument, returning 0 when we've reached the end of
1181: * line or 1 otherwise.
1182: */
1183: int
1.14 kristaps 1184: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1185: {
1186:
1.14 kristaps 1187: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1188: p->seenws = 1;
1.14 kristaps 1189: advance(p, pos);
1.1 kristaps 1190: }
1191:
1.14 kristaps 1192: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1193: parsebracket(p, pos, 0);
1.14 kristaps 1194: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1195: parsesingle(p, pos);
1.1 kristaps 1196: else
1197: return(0);
1198:
1199: return(1);
1200: }
1201:
1202: /*
1203: * Parse til the end of the buffer.
1204: */
1.14 kristaps 1205: static void
1206: parseeof(struct texi *p)
1.1 kristaps 1207: {
1208: size_t pos;
1209:
1.34 schwarze 1210: for (pos = 0; pos < BUFSZ(p); )
1.14 kristaps 1211: parsesingle(p, &pos);
1.1 kristaps 1212: }
1213:
1.8 kristaps 1214: void
1.21 kristaps 1215: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1216: {
1.14 kristaps 1217: char *cp;
1218: struct texifile *f;
1.8 kristaps 1219:
1.14 kristaps 1220: assert(p->filepos > 0);
1221: f = &p->files[p->filepos - 1];
1.8 kristaps 1222:
1.14 kristaps 1223: if (f->mapsz + sz > f->mapmaxsz) {
1224: f->mapmaxsz = f->mapsz + sz + 1024;
1225: cp = realloc(f->map, f->mapmaxsz);
1226: if (NULL == cp)
1227: texiabort(p, NULL);
1228: f->map = cp;
1229: }
1.8 kristaps 1230:
1.15 kristaps 1231: f->insplice += sz;
1.21 kristaps 1232: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1233: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1234: f->mapsz += sz;
1.8 kristaps 1235: }
1236:
1237: /*
1.1 kristaps 1238: * Parse a block sequence until we have the "@end endtoken" command
1239: * invocation.
1240: * This will return immediately at EOF.
1241: */
1242: void
1.14 kristaps 1243: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1244: {
1.17 kristaps 1245: size_t end, sv;
1.7 kristaps 1246: enum texicmd cmd;
1247: size_t endtoksz;
1248: struct teximacro *macro;
1.1 kristaps 1249:
1250: endtoksz = strlen(endtoken);
1251: assert(endtoksz > 0);
1.34 schwarze 1252:
1.14 kristaps 1253: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1254: switch (BUF(p)[*pos]) {
1.1 kristaps 1255: case ('}'):
1256: if (0 == p->ign)
1257: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1258: advance(p, pos);
1.1 kristaps 1259: continue;
1260: case ('{'):
1261: if (0 == p->ign)
1262: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1263: advance(p, pos);
1.1 kristaps 1264: continue;
1265: case ('@'):
1266: break;
1267: default:
1.14 kristaps 1268: parseword(p, pos, '\0');
1.1 kristaps 1269: continue;
1270: }
1271:
1.17 kristaps 1272: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1273: cmd = texicmd(p, *pos, &end, ¯o);
1274: advanceto(p, pos, end);
1.1 kristaps 1275: if (TEXICMD_END == cmd) {
1.14 kristaps 1276: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1277: advance(p, pos);
1.34 schwarze 1278: /*
1.1 kristaps 1279: * FIXME: check the full word, not just its
1280: * initial substring!
1281: */
1.14 kristaps 1282: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1283: (&BUF(p)[*pos], endtoken, endtoksz)) {
1284: advanceeoln(p, pos, 0);
1.1 kristaps 1285: break;
1286: }
1287: if (0 == p->ign)
1288: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1289: advanceeoln(p, pos, 0);
1.1 kristaps 1290: continue;
1.34 schwarze 1291: }
1.7 kristaps 1292: if (NULL != macro)
1.17 kristaps 1293: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1294: if (TEXICMD__MAX == cmd)
1.7 kristaps 1295: continue;
1.34 schwarze 1296: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1297: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1298: }
1.30 kristaps 1299:
1.34 schwarze 1300: if (*pos == BUFSZ(p))
1.30 kristaps 1301: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1302: }
1303:
1304: /*
1.12 kristaps 1305: * Like parsefile() but used for reading from stdandard input.
1306: * This can only be called for the first file!
1307: */
1308: void
1309: parsestdin(struct texi *p)
1310: {
1311: struct texifile *f;
1312: ssize_t ssz;
1313:
1314: assert(0 == p->filepos);
1315: f = &p->files[p->filepos];
1316: memset(f, 0, sizeof(struct texifile));
1317:
1318: f->type = TEXISRC_STDIN;
1319: f->name = "<stdin>";
1320:
1.14 kristaps 1321: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1322: if (f->mapsz == f->mapmaxsz) {
1323: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1324: texierr(p, "stdin buffer too long");
1.34 schwarze 1325: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1.14 kristaps 1326: 2 * f->mapmaxsz : 65536;
1327: f->map = realloc(f->map, f->mapmaxsz);
1.34 schwarze 1328: if (NULL == f->map)
1.12 kristaps 1329: texiabort(p, NULL);
1330: }
1.34 schwarze 1331: ssz = read(STDIN_FILENO, f->map +
1.14 kristaps 1332: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1333: if (0 == ssz)
1334: break;
1.34 schwarze 1335: else if (-1 == ssz)
1.12 kristaps 1336: texiabort(p, NULL);
1337: }
1338:
1339: p->filepos++;
1.14 kristaps 1340: parseeof(p);
1.12 kristaps 1341: texifilepop(p);
1342: }
1343:
1344: /*
1.1 kristaps 1345: * Memory-map the file "fname" and begin parsing it unless "parse" is
1346: * zero, in which case we just dump the file to stdout (making sure it
1347: * doesn't trip up mdoc(7) along the way).
1348: * This can be called in a nested context.
1349: */
1350: void
1351: parsefile(struct texi *p, const char *fname, int parse)
1352: {
1353: struct texifile *f;
1354: int fd;
1355: struct stat st;
1356: size_t i;
1.14 kristaps 1357: char *map;
1.1 kristaps 1358:
1.34 schwarze 1359: if (64 == p->filepos)
1.6 kristaps 1360: texierr(p, "too many open files");
1.1 kristaps 1361: f = &p->files[p->filepos];
1362: memset(f, 0, sizeof(struct texifile));
1363:
1.12 kristaps 1364: f->type = TEXISRC_FILE;
1.1 kristaps 1365: f->name = fname;
1366: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1367: texiabort(p, fname);
1368: } else if (-1 == fstat(fd, &st)) {
1369: close(fd);
1370: texiabort(p, fname);
1.34 schwarze 1371: }
1.1 kristaps 1372:
1.14 kristaps 1373: f->mapsz = f->mapmaxsz = st.st_size;
1374: map = mmap(NULL, f->mapsz,
1.1 kristaps 1375: PROT_READ, MAP_SHARED, fd, 0);
1376: close(fd);
1377:
1.14 kristaps 1378: if (MAP_FAILED == map)
1.1 kristaps 1379: texiabort(p, fname);
1380:
1381: if ( ! parse) {
1.13 kristaps 1382: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1383: texiputchar(p, map[i]);
1.13 kristaps 1384: if (p->outcol)
1385: texiputchar(p, '\n');
1.14 kristaps 1386: munmap(map, f->mapsz);
1387: return;
1388: }
1389:
1390: p->filepos++;
1391: f->map = malloc(f->mapsz);
1392: memcpy(f->map, map, f->mapsz);
1393: munmap(map, f->mapsz);
1394: parseeof(p);
1.1 kristaps 1395: texifilepop(p);
1396: }
1397:
1.2 kristaps 1398: /*
1399: * Look up the value to a stored pair's value starting in "buf" from
1400: * start to end.
1401: * Return the pointer to the value memory, which can be NULL if the
1402: * pointer key does not exist.
1403: * The pointer can point to NULL if the value has been unset.
1404: */
1405: static char **
1.14 kristaps 1406: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1407: {
1408: size_t i, sz, len;
1409:
1410: assert(end >= start);
1411: /* Ignore zero-length. */
1412: if (0 == (len = (end - start)))
1413: return(NULL);
1414: for (i = 0; i < p->valsz; i++) {
1415: sz = strlen(p->vals[i].key);
1416: if (sz != len)
1417: continue;
1.14 kristaps 1418: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1419: return(&p->vals[i].value);
1420: }
1421: return(NULL);
1422: }
1423:
1424: /*
1425: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1426: * pointer to its value via valuequery().
1427: */
1428: static char **
1.14 kristaps 1429: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1430: {
1431: size_t start, end;
1432: char **ret;
1433:
1.14 kristaps 1434: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1435: advance(p, pos);
1436: if (*pos == BUFSZ(p))
1.2 kristaps 1437: return(NULL);
1.14 kristaps 1438: for (start = end = *pos; end < BUFSZ(p); end++)
1439: if ('\n' == BUF(p)[end])
1.2 kristaps 1440: break;
1.14 kristaps 1441: advanceto(p, pos, end);
1442: if (*pos < BUFSZ(p)) {
1443: assert('\n' == BUF(p)[*pos]);
1444: advance(p, pos);
1.2 kristaps 1445: }
1.14 kristaps 1446: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1447: return(NULL);
1448: return(ret);
1449: }
1450:
1451: void
1.14 kristaps 1452: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1453: {
1454: char **ret;
1455:
1.14 kristaps 1456: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1457: return;
1458: free(*ret);
1459: *ret = NULL;
1460: }
1461:
1462: const char *
1.14 kristaps 1463: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1464: {
1465: char **ret;
1466:
1.14 kristaps 1467: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1468: return(NULL);
1469: return(*ret);
1470: }
1471:
1472: /*
1473: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1474: * the pointer to its value.
1475: * If the returned pointer is NULL, either there was no string within
1476: * the brackets (or no brackets), or the value was not found, or the
1477: * value had previously been unset.
1478: */
1479: const char *
1.14 kristaps 1480: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1481: {
1482: size_t start, end;
1483: char **ret;
1484:
1.14 kristaps 1485: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1486: advance(p, pos);
1487: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1488: return(NULL);
1.14 kristaps 1489: advance(p, pos);
1490: for (start = end = *pos; end < BUFSZ(p); end++)
1491: if ('}' == BUF(p)[end])
1.2 kristaps 1492: break;
1.14 kristaps 1493: advanceto(p, pos, end);
1494: if (*pos < BUFSZ(p)) {
1495: assert('}' == BUF(p)[*pos]);
1496: advance(p, pos);
1.2 kristaps 1497: }
1.14 kristaps 1498: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1499: return(NULL);
1500: return(*ret);
1501: }
1502:
1503: void
1504: valueadd(struct texi *p, char *key, char *val)
1505: {
1506: size_t i;
1507:
1508: assert(NULL != key);
1509: assert(NULL != val);
1510:
1511: for (i = 0; i < p->valsz; i++)
1512: if (0 == strcmp(p->vals[i].key, key))
1513: break;
1514:
1515: if (i < p->valsz) {
1516: free(key);
1517: free(p->vals[i].value);
1518: p->vals[i].value = val;
1519: } else {
1.4 kristaps 1520: /* FIXME: reallocarray() */
1.34 schwarze 1521: p->vals = realloc(p->vals,
1.2 kristaps 1522: (p->valsz + 1) *
1523: sizeof(struct texivalue));
1.4 kristaps 1524: if (NULL == p->vals)
1525: texiabort(p, NULL);
1.2 kristaps 1526: p->vals[p->valsz].key = key;
1527: p->vals[p->valsz].value = val;
1528: p->valsz++;
1529: }
1.7 kristaps 1530: }
1531:
1532: /*
1533: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1534: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1535: * an array of arguments of size "argsz".
1536: * These need to be freed individually and as a whole.
1537: * NOTE: this will puke on @, or @} macros, which can trick it into
1538: * stopping argument parsing earlier.
1539: * Ergo, textual: this doesn't interpret the arguments in any way.
1540: */
1541: char **
1.14 kristaps 1542: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1543: {
1544: char **args;
1545: size_t start, end, stack;
1546:
1.14 kristaps 1547: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1548: advance(p, pos);
1.7 kristaps 1549:
1550: args = NULL;
1551: *argsz = 0;
1552:
1.17 kristaps 1553: if (*pos == BUFSZ(p))
1554: return(args);
1555:
1.14 kristaps 1556: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1557: /*
1558: * Special case: if we encounter an unbracketed argument
1559: * and we're being invoked with non-zero arguments
1560: * (versus being set, i.e., hint>0), then parse until
1561: * the end of line.
1562: */
1563: *argsz = 1;
1564: args = calloc(1, sizeof(char *));
1565: if (NULL == args)
1566: texiabort(p, NULL);
1567: start = *pos;
1.14 kristaps 1568: while (*pos < BUFSZ(p)) {
1569: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1570: break;
1.14 kristaps 1571: advance(p, pos);
1.10 kristaps 1572: }
1573: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1574: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1575: args[0][*pos - start] = '\0';
1.14 kristaps 1576: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1577: advance(p, pos);
1.10 kristaps 1578: return(args);
1.14 kristaps 1579: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1580: return(args);
1.17 kristaps 1581:
1582: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1583:
1584: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1585: advance(p, pos);
1586: while (*pos < BUFSZ(p)) {
1587: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1588: advance(p, pos);
1.7 kristaps 1589: start = *pos;
1590: stack = 0;
1.14 kristaps 1591: while (*pos < BUFSZ(p)) {
1.34 schwarze 1592: /*
1.7 kristaps 1593: * According to the manual, commas within
1594: * embedded commands are escaped.
1595: * We keep track of embedded-ness in the "stack"
1596: * state anyway, so this is free.
1597: */
1.14 kristaps 1598: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1599: break;
1.14 kristaps 1600: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1601: break;
1.14 kristaps 1602: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1603: stack--;
1.14 kristaps 1604: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1605: stack++;
1.14 kristaps 1606: advance(p, pos);
1.7 kristaps 1607: }
1608: if (stack)
1609: texiwarn(p, "unterminated macro "
1610: "in macro arguments");
1.14 kristaps 1611: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1612: break;
1613: /* Test for zero-length '{ }'. */
1.14 kristaps 1614: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1615: break;
1616: /* FIXME: use reallocarray. */
1617: args = realloc
1618: (args, sizeof(char *) *
1619: (*argsz + 1));
1620: if (NULL == args)
1621: texiabort(p, NULL);
1622: args[*argsz] = malloc(end - start + 1);
1623: if (NULL == args[*argsz])
1624: texiabort(p, NULL);
1625: memcpy(args[*argsz],
1.14 kristaps 1626: &BUF(p)[start], end - start);
1.7 kristaps 1627: args[*argsz][end - start] = '\0';
1628: (*argsz)++;
1.14 kristaps 1629: if ('}' == BUF(p)[*pos])
1.7 kristaps 1630: break;
1.14 kristaps 1631: advance(p, pos);
1.7 kristaps 1632: }
1633:
1.34 schwarze 1634: if (*pos == BUFSZ(p))
1.7 kristaps 1635: texierr(p, "unterminated arguments");
1.14 kristaps 1636: assert('}' == BUF(p)[*pos]);
1637: advance(p, pos);
1.7 kristaps 1638: return(args);
1.2 kristaps 1639: }
1.20 kristaps 1640:
1641: /*
1642: * If we're printing chapters, then do some naviation here and then
1643: * close our outfile.
1644: * I want to call this the SEE ALSO section, but that's not really what
1645: * it is: we'll refer to the "initial" (top) node and the next and
1646: * previous chapters.
1647: */
1648: void
1649: teximdocclose(struct texi *p, int last)
1650: {
1651: char buf[PATH_MAX];
1652:
1.32 kristaps 1653: if (NULL == p->chapters || 1 == p->nodesz)
1.20 kristaps 1654: return;
1655:
1656: teximacro(p, "Sh INFO NAVIGATION");
1657:
1658: /* Print a reference to the "top" node. */
1.32 kristaps 1659: if (-1 != p->nodecache[p->nodecur].up) {
1.22 kristaps 1660: texiputchars(p, "Top node,");
1.34 schwarze 1661: snprintf(buf, sizeof(buf), "%s-%zd 7",
1.32 kristaps 1662: p->chapters, p->nodecache[p->nodecur].up);
1.31 kristaps 1663: p->seenvs = 0;
1.20 kristaps 1664: teximacroopen(p, "Xr ");
1665: texiputchars(p, buf);
1.22 kristaps 1666: texiputchars(p, " ;");
1.20 kristaps 1667: teximacroclose(p);
1668: }
1669:
1.32 kristaps 1670: if (-1 != p->nodecache[p->nodecur].prev) {
1.22 kristaps 1671: texiputchars(p, "previous node,");
1.34 schwarze 1672: snprintf(buf, sizeof(buf), "%s-%zd 7",
1.32 kristaps 1673: p->chapters, p->nodecache[p->nodecur].prev);
1.31 kristaps 1674: p->seenvs = 0;
1.20 kristaps 1675: teximacroopen(p, "Xr ");
1676: texiputchars(p, buf);
1.34 schwarze 1677: if ( ! last)
1.22 kristaps 1678: texiputchars(p, " ;");
1.20 kristaps 1679: teximacroclose(p);
1.34 schwarze 1680: }
1.20 kristaps 1681:
1.32 kristaps 1682: if (-1 != p->nodecache[p->nodecur].next) {
1683: texiputchars(p, "next node,");
1684: snprintf(buf, sizeof(buf), "%s-%zd 7",
1685: p->chapters, p->nodecache[p->nodecur].next);
1.31 kristaps 1686: p->seenvs = 0;
1.20 kristaps 1687: teximacroopen(p, "Xr ");
1688: texiputchars(p, buf);
1689: teximacroclose(p);
1690: }
1691:
1692: fclose(p->outfile);
1.32 kristaps 1693: p->outfile = NULL;
1694: }
1695:
1696: ssize_t
1697: texicache(struct texi *p, const char *buf, size_t sz)
1698: {
1699: size_t i;
1700:
1701: for (i = 0; i < p->nodecachesz; i++) {
1702: if (sz != strlen(p->nodecache[i].name))
1703: continue;
1704: if (strncmp(buf, p->nodecache[i].name, sz))
1705: continue;
1706: break;
1707: }
1708: if (i < p->nodecachesz)
1709: return(i);
1710: if (NULL == buf)
1711: return(-1);
1712: p->nodecache = realloc
1.34 schwarze 1713: (p->nodecache,
1.32 kristaps 1714: (p->nodecachesz + 1) * sizeof(struct texinode));
1715: if (NULL == p->nodecache)
1716: texiabort(p, NULL);
1717: p->nodecache[p->nodecachesz].name = malloc(sz + 1);
1718: if (NULL == p->nodecache[p->nodecachesz].name)
1719: texiabort(p, NULL);
1720: memcpy(p->nodecache[p->nodecachesz].name, buf, sz);
1721: p->nodecache[p->nodecachesz].name[sz] = '\0';
1722: p->nodecache[p->nodecachesz].up =
1723: p->nodecache[p->nodecachesz].next =
1724: p->nodecache[p->nodecachesz].prev = -1;
1725: p->nodecachesz++;
1726: return(p->nodecachesz - 1);
1.20 kristaps 1727: }
1728:
1729: /*
1.32 kristaps 1730: * Here we print our standard mdoc(7) prologue.
1731: * We use the title set with @settitle for the `Nd' description
1732: * and the source document filename (the first one as invoked on
1733: * the command line) for the title.
1.35 schwarze 1734: * The date is set to the modification time of the input.
1.20 kristaps 1735: */
1736: void
1.21 kristaps 1737: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1738: {
1739: const char *cp;
1740:
1.30 kristaps 1741: p->seenvs = -1;
1.20 kristaps 1742: teximacroopen(p, "Dd");
1.35 schwarze 1743: texiputchars(p, p->date);
1.20 kristaps 1744: teximacroclose(p);
1745: teximacroopen(p, "Dt");
1746: for (cp = p->title; '\0' != *cp; cp++)
1747: texiputchar(p, toupper((unsigned int)*cp));
1748: texiputchars(p, " 7");
1749: teximacroclose(p);
1750: teximacro(p, "Os");
1751: teximacro(p, "Sh NAME");
1752: teximacroopen(p, "Nm");
1753: for (cp = p->title; '\0' != *cp; cp++)
1754: texiputchar(p, *cp);
1755: teximacroclose(p);
1756: teximacroopen(p, "Nd");
1.21 kristaps 1757: /*
1758: * The subtitle `Nd' can consist of arbitrary macros, so paste
1759: * it and parse to the end of the line.
1760: */
1761: if (NULL != p->subtitle) {
1762: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1763: parseeoln(p, pos);
1764: } else
1.20 kristaps 1765: texiputchars(p, "Unknown description");
1766: teximacroclose(p);
1767: }
1768:
CVSweb