Annotation of texi2mdoc/util.c, Revision 1.35
1.35 ! schwarze 1: /* $Id: util.c,v 1.34 2015/03/19 09:53:36 schwarze Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include "extern.h"
31:
32: /*
1.29 kristaps 33: * Table of macros.
34: * These ABSOLUTELY MUST BE 2 or three characters long.
35: */
36: static const char *const mdocs[] = {
37: "Ap", "Dd", "Dt", "Os",
38: "Sh", "Ss", "Pp", "D1",
39: "Dl", "Bd", "Ed", "Bl",
40: "El", "It", "Ad", "An",
41: "Ar", "Cd", "Cm", "Dv",
42: "Er", "Ev", "Ex", "Fa",
43: "Fd", "Fl", "Fn", "Ft",
44: "Ic", "In", "Li", "Nd",
45: "Nm", "Op", "Ot", "Pa",
46: "Rv", "St", "Va", "Vt",
47: "Xr", "%A", "%B", "%D",
48: "%I", "%J", "%N", "%O",
49: "%P", "%R", "%T", "%V",
50: "Ac", "Ao", "Aq", "At",
51: "Bc", "Bf", "Bo", "Bq",
52: "Bsx", "Bx", "Db", "Dc",
53: "Do", "Dq", "Ec", "Ef",
54: "Em", "Eo", "Fx", "Ms",
55: "No", "Ns", "Nx", "Ox",
56: "Pc", "Pf", "Po", "Pq",
57: "Qc", "Ql", "Qo", "Qq",
58: "Re", "Rs", "Sc", "So",
59: "Sq", "Sm", "Sx", "Sy",
60: "Tn", "Ux", "Xc", "Xo",
61: "Fo", "Fc", "Oo", "Oc",
62: "Bk", "Ek", "Bt", "Hf",
63: "Fr", "Ud", "Lb", "Lp",
64: "Lk", "Mt", "Brq", "Bro",
65: "Brc", "%C", "Es", "En",
66: "Dx", "%Q", "br", "sp",
67: "%U", "Ta", "ll", NULL,
68: };
69:
70: /*
1.1 kristaps 71: * Unmap the top-most file in the stack of files currently opened (that
72: * is, nested calls to parsefile()).
73: */
74: void
75: texifilepop(struct texi *p)
76: {
77: struct texifile *f;
78:
79: assert(p->filepos > 0);
80: f = &p->files[--p->filepos];
1.14 kristaps 81: free(f->map);
1.1 kristaps 82: }
83:
1.7 kristaps 84: static void
85: teximacrofree(struct teximacro *p)
86: {
87: size_t i;
88:
89: for (i = 0; i < p->argsz; i++)
90: free(p->args[i]);
91:
92: free(p->args);
93: free(p->key);
94: free(p->value);
95: }
96:
97: static void
98: texivaluefree(struct texivalue *p)
99: {
100:
101: free(p->key);
102: free(p->value);
103: }
104:
1.30 kristaps 105: static void
106: texidex_free(struct texidex *p)
107: {
108: size_t i;
109:
110: for (i = 0; i < p->indexsz; i++)
1.31 kristaps 111: free(p->index[i].term);
1.30 kristaps 112:
113: free(p->index);
114: free(p->name);
115: p->index = NULL;
116: p->indexsz = 0;
117: }
118:
119: /*
120: * Add the text beginning at "index" and of "sz" bytes to the index
121: * named "tok" with name size "toksz".
122: * This will also output the necessary mdoc(7) to construct the index.
123: */
124: void
125: texindex(struct texi *p, const char *tok,
126: size_t toksz, const char *index, size_t sz)
127: {
1.31 kristaps 128: size_t i, isz;
1.30 kristaps 129: #ifdef HAVE_INDEX
130: char *cp;
131: #endif
132:
133: if (0 == sz) {
134: texiwarn(p, "zero-length index entry");
135: return;
136: }
137:
138: /* Look for the index. (Must be found.) */
139: for (i = 0; i < p->indexsz; i++) {
140: if (strlen(p->indexs[i].name) != toksz)
141: continue;
142: if (strncmp(p->indexs[i].name, tok, toksz))
143: continue;
144: break;
145: }
146:
147: assert(i < p->indexsz);
1.31 kristaps 148: isz = p->indexs[i].indexsz;
1.30 kristaps 149: /* Reallocate index's terms. */
150: p->indexs[i].index = realloc
151: (p->indexs[i].index,
1.31 kristaps 152: (isz + 1) * sizeof(struct texiterm));
1.30 kristaps 153: if (NULL == p->indexs[i].index)
154: texiabort(p, NULL);
155:
156: /* Add term to term array. */
1.32 kristaps 157: p->indexs[i].index[isz].chapter = p->nodecur;
1.31 kristaps 158: p->indexs[i].index[isz].term = malloc(sz + 1);
159: if (NULL == p->indexs[i].index[isz].term)
1.30 kristaps 160: texiabort(p, NULL);
1.31 kristaps 161: memcpy(p->indexs[i].index[isz].term, index, sz);
162: p->indexs[i].index[isz].term[sz] = '\0';
1.34 schwarze 163:
1.30 kristaps 164: /* Output mdoc(7) for index. */
165: #ifdef HAVE_INDEX
166: p->seenvs = -1;
167: teximacroopen(p, "Ix");
168: texiputchars(p, "idx");
169: texiputchars(p, p->indexs[i].name);
1.31 kristaps 170: cp = p->indexs[i].index[isz].term;
1.32 kristaps 171: while ('\n' != *cp)
1.30 kristaps 172: texiputchar(p, *cp++);
173: teximacroclose(p);
174: #endif
175: p->indexs[i].indexsz++;
176: }
177:
178: /*
179: * Add an index entry named "tok" of length "sz".
180: * This usually consists of two letters, e.g., "cp" or "vr".
181: * This does nothing if the index exists or is zero-sized.
182: */
183: void
184: texindex_add(struct texi *p, const char *tok, size_t sz)
185: {
186: size_t i;
187: char *cp;
188:
189: if (0 == sz)
190: return;
191:
192: /* Make sure we don't have a duplicate. */
193: for (i = 0; i < p->indexsz; i++) {
194: if (strlen(p->indexs[i].name) != sz)
195: continue;
196: if (strncmp(p->indexs[i].name, tok, sz))
197: continue;
198: return;
199: }
200:
201: /* Reallocate indices. */
1.34 schwarze 202: p->indexs = realloc(p->indexs,
203: sizeof(struct texidex) *
1.31 kristaps 204: (p->indexsz + 1));
1.30 kristaps 205: if (NULL == p->indexs)
206: texiabort(p, NULL);
207: if (NULL == (cp = malloc(sz + 1)))
208: texiabort(p, NULL);
209: memcpy(cp, tok, sz);
210: cp[sz] = '\0';
211: p->indexs[p->indexsz].name = cp;
212: p->indexs[p->indexsz].index = NULL;
213: p->indexs[p->indexsz].indexsz = 0;
214: p->indexsz++;
215: }
216:
1.1 kristaps 217: /*
218: * Unmap all files that we're currently using and free all resources
219: * that we've allocated during the parse.
220: * The utility should exit(...) after this is called.
221: */
222: void
223: texiexit(struct texi *p)
224: {
225: size_t i;
226:
227: /* Make sure we're newline-terminated. */
228: if (p->outcol)
1.20 kristaps 229: fputc('\n', p->outfile);
230: if (NULL != p->chapters)
231: teximdocclose(p, 1);
1.1 kristaps 232:
233: /* Unmap all files. */
234: while (p->filepos > 0)
235: texifilepop(p);
236:
1.7 kristaps 237: for (i = 0; i < p->macrosz; i++)
238: teximacrofree(&p->macros[i]);
1.1 kristaps 239: for (i = 0; i < p->dirsz; i++)
240: free(p->dirs[i]);
1.4 kristaps 241: for (i = 0; i < p->indexsz; i++)
1.30 kristaps 242: texidex_free(&p->indexs[i]);
1.34 schwarze 243: for (i = 0; i < p->valsz; i++)
1.7 kristaps 244: texivaluefree(&p->vals[i]);
1.4 kristaps 245:
1.32 kristaps 246: free(p->nodecache);
1.7 kristaps 247: free(p->macros);
1.1 kristaps 248: free(p->vals);
1.4 kristaps 249: free(p->indexs);
1.1 kristaps 250: free(p->dirs);
251: free(p->subtitle);
252: free(p->title);
1.26 kristaps 253: free(p->copying);
1.1 kristaps 254: }
255:
256: /*
257: * Fatal error: unmap all files and exit.
258: * The "errstring" is passed to perror(3).
259: */
260: void
261: texiabort(struct texi *p, const char *errstring)
262: {
263:
264: perror(errstring);
265: texiexit(p);
266: exit(EXIT_FAILURE);
267: }
268:
269: /*
270: * Print a generic warning message (to stderr) tied to our current
271: * location in the parse sequence.
272: */
273: void
274: texiwarn(const struct texi *p, const char *fmt, ...)
275: {
1.34 schwarze 276: va_list ap;
1.15 kristaps 277: const struct texifile *f;
278:
279: f = &p->files[p->filepos - 1];
280:
281: if (f->insplice)
282: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
1.34 schwarze 283: "warning: ", f->name, f->line + 1,
1.15 kristaps 284: f->col + 1, f->insplice);
285: else
286: fprintf(stderr, "%s:%zu:%zu: warning: ",
287: f->name, f->line + 1, f->col + 1);
1.1 kristaps 288:
289: va_start(ap, fmt);
290: vfprintf(stderr, fmt, ap);
291: va_end(ap);
292: fputc('\n', stderr);
293: }
294:
295: /*
296: * Print an error message (to stderr) tied to our current location in
297: * the parse sequence, invoke texiexit(), then die.
298: */
299: void
300: texierr(struct texi *p, const char *fmt, ...)
301: {
1.34 schwarze 302: va_list ap;
1.15 kristaps 303: struct texifile *f;
304:
305: f = &p->files[p->filepos - 1];
306:
1.34 schwarze 307: if (f->insplice)
1.15 kristaps 308: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
1.34 schwarze 309: "error: ", f->name, f->line + 1,
1.15 kristaps 310: f->col + 1, f->insplice);
311: else
312: fprintf(stderr, "%s:%zu:%zu: error: ",
313: f->name, f->line + 1, f->col + 1);
1.1 kristaps 314:
315: va_start(ap, fmt);
316: vfprintf(stderr, fmt, ap);
317: va_end(ap);
318: fputc('\n', stderr);
319: texiexit(p);
320: exit(EXIT_FAILURE);
321: }
322:
323: /*
324: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 325: * Escape starting a line with a control character and slashes.
1.1 kristaps 326: */
327: void
328: texiputchar(struct texi *p, char c)
329: {
330:
331: if (p->ign)
332: return;
333: if ('.' == c && 0 == p->outcol)
1.20 kristaps 334: fputs("\\&", p->outfile);
1.10 kristaps 335: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 336: fputs("\\&", p->outfile);
1.1 kristaps 337:
1.23 kristaps 338: if (p->uppercase)
339: fputc(toupper((unsigned int)c), p->outfile);
340: else
341: fputc(c, p->outfile);
1.13 kristaps 342: if ('\\' == c)
1.20 kristaps 343: fputc('e', p->outfile);
1.1 kristaps 344: if ('\n' == c) {
345: p->outcol = 0;
346: p->seenws = 0;
347: } else
348: p->outcol++;
349: }
350:
351: /*
1.13 kristaps 352: * Put an opaque series of characters.
353: * Characters starting a line with a control character are escaped, but
354: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 355: */
356: void
357: texiputchars(struct texi *p, const char *s)
358: {
359:
1.13 kristaps 360: if (p->ign)
361: return;
362: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 363: fputs("\\&", p->outfile);
1.13 kristaps 364: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 365: fputs("\\&", p->outfile);
1.34 schwarze 366: if (p->uppercase)
1.23 kristaps 367: for ( ; '\0' != *s; s++)
368: p->outcol += fputc(toupper
369: ((unsigned int)*s), p->outfile);
370: else
371: p->outcol += fputs(s, p->outfile);
1.9 kristaps 372: }
373:
374: /*
375: * This puts all characters onto the output stream but makes sure to
376: * escape mdoc(7) slashes.
1.14 kristaps 377: * FIXME: useless.
1.9 kristaps 378: */
379: void
1.14 kristaps 380: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 381: {
382:
1.14 kristaps 383: for ( ; start < end; start++)
384: texiputchar(p, BUF(p)[start]);
1.1 kristaps 385: }
386:
387: /*
388: * Close an mdoc(7) macro opened with teximacroopen().
389: * If there are no more macros on the line, prints a newline.
390: */
391: void
392: teximacroclose(struct texi *p)
393: {
394:
1.30 kristaps 395: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 396: return;
397:
398: if (0 == --p->outmacro) {
1.20 kristaps 399: fputc('\n', p->outfile);
1.1 kristaps 400: p->outcol = p->seenws = 0;
401: }
402: }
403:
404: /*
405: * Open a mdoc(7) macro.
406: * This is used for line macros, e.g., Qq [foo bar baz].
407: * It can be invoked for nested macros, e.g., Qq Li foo .
408: * TODO: flush-right punctuation (e.g., parenthesis).
409: */
410: void
411: teximacroopen(struct texi *p, const char *s)
412: {
413: int rc;
414:
1.30 kristaps 415: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 416: return;
417:
418: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 419: fputc('\n', p->outfile);
1.1 kristaps 420: p->outcol = 0;
421: }
422:
1.30 kristaps 423: if (p->seenvs > 0 && 0 == p->outmacro)
424: fputs(".Pp\n", p->outfile);
425:
1.1 kristaps 426: if (0 == p->outmacro)
1.20 kristaps 427: fputc('.', p->outfile);
1.1 kristaps 428: else
1.20 kristaps 429: fputc(' ', p->outfile);
1.1 kristaps 430:
1.20 kristaps 431: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 432: p->outcol += rc;
433:
1.20 kristaps 434: fputc(' ', p->outfile);
1.1 kristaps 435: p->outcol++;
436: p->outmacro++;
1.30 kristaps 437: p->seenws = p->seenvs = 0;
1.1 kristaps 438: }
439:
440: /*
441: * Put a stadnalone mdoc(7) command with the trailing newline.
442: */
443: void
444: teximacro(struct texi *p, const char *s)
445: {
446:
447: if (p->ign)
448: return;
449:
450: if (p->outmacro)
451: texierr(p, "\"%s\" in open line scope!?", s);
452: if (p->literal)
453: texierr(p, "\"%s\" in a literal scope!?", s);
454: if (p->outcol)
1.20 kristaps 455: fputc('\n', p->outfile);
1.30 kristaps 456: if (p->seenvs > 0)
457: fputs(".Pp\n", p->outfile);
1.1 kristaps 458:
1.20 kristaps 459: fputc('.', p->outfile);
460: fputs(s, p->outfile);
461: fputc('\n', p->outfile);
1.1 kristaps 462: p->outcol = p->seenws = 0;
463: }
464:
465: /*
466: * Introduce vertical space during normal (non-macro) input.
467: */
468: void
469: texivspace(struct texi *p)
470: {
471:
1.30 kristaps 472: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
473: p->seenvs = 1;
1.1 kristaps 474: }
475:
476: /*
477: * Advance by a single byte in the input stream, adjusting our location
478: * in the current input file.
479: */
480: void
1.14 kristaps 481: advance(struct texi *p, size_t *pos)
1.1 kristaps 482: {
1.15 kristaps 483: struct texifile *f;
1.1 kristaps 484:
1.15 kristaps 485: f = &p->files[p->filepos - 1];
486:
487: if (0 == f->insplice) {
488: if ('\n' == BUF(p)[*pos]) {
489: f->line++;
490: f->col = 0;
491: } else
492: f->col++;
1.17 kristaps 493: } else {
1.15 kristaps 494: --f->insplice;
1.17 kristaps 495: if (0 == f->insplice)
496: f->depth = 0;
497: }
1.1 kristaps 498:
499: (*pos)++;
500: }
501:
502: /*
503: * It's common to wait punctuation to float on the right side of macro
504: * lines in mdoc(7), e.g., ".Em hello ) ."
505: * This function does so, and should be called before teximacroclose().
506: * It will detect that it's the last in the nested macros and
507: * appropriately flush-left punctuation alongside the macro.
508: */
509: void
1.14 kristaps 510: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 511: {
512: size_t start, end;
513:
514: if (1 != p->outmacro)
515: return;
516:
1.14 kristaps 517: for (start = end = *pos; end < BUFSZ(p); end++) {
518: switch (BUF(p)[end]) {
1.1 kristaps 519: case (','):
520: case (')'):
521: case ('.'):
522: case ('"'):
523: case (':'):
1.22 kristaps 524: case (';'):
1.1 kristaps 525: case ('!'):
526: case ('?'):
527: continue;
528: default:
529: break;
530: }
531: break;
532: }
533: if (end == *pos)
534: return;
1.34 schwarze 535: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 kristaps 536: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 537: for ( ; start < end; start++) {
538: texiputchar(p, ' ');
1.14 kristaps 539: texiputchar(p, BUF(p)[start]);
540: advance(p, pos);
1.1 kristaps 541: }
542: }
543: }
544:
545: /*
546: * Advance to the next non-whitespace word in the input stream.
547: * If we're in literal mode, then print all of the whitespace as we're
548: * doing so.
549: */
550: static size_t
1.14 kristaps 551: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 552: {
1.34 schwarze 553:
1.1 kristaps 554: if (p->literal) {
1.14 kristaps 555: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
556: texiputchar(p, BUF(p)[*pos]);
557: advance(p, pos);
1.1 kristaps 558: }
559: return(*pos);
1.34 schwarze 560: }
1.1 kristaps 561:
1.14 kristaps 562: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 563: p->seenws = 1;
1.30 kristaps 564: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
565: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
566: p->seenvs = 1;
1.14 kristaps 567: advance(p, pos);
1.1 kristaps 568: }
569: return(*pos);
570: }
571:
572: /*
573: * Advance to the EOLN in the input stream.
1.22 kristaps 574: * This will skip over '@' markers in an effort to ignore escaped
575: * newlines.
1.1 kristaps 576: */
577: size_t
1.14 kristaps 578: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 579: {
580:
1.22 kristaps 581: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
582: if ('@' == BUF(p)[*pos])
583: advance(p, pos);
1.33 kristaps 584: if (*pos < BUFSZ(p))
585: advance(p, pos);
1.22 kristaps 586: }
1.14 kristaps 587: if (*pos < BUFSZ(p) && consumenl)
588: advance(p, pos);
1.1 kristaps 589: return(*pos);
590: }
591:
592: /*
593: * Advance to position "end", which is an absolute position in the
594: * current buffer greater than or equal to the current position.
595: */
596: void
1.14 kristaps 597: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 598: {
599:
600: assert(*pos <= end);
1.34 schwarze 601: while (*pos < end)
1.14 kristaps 602: advance(p, pos);
1.1 kristaps 603: }
604:
1.7 kristaps 605: static void
1.17 kristaps 606: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 607: {
1.34 schwarze 608: size_t valsz, realsz, aasz, asz,
1.11 kristaps 609: ssz, i, j, k, start, end;
610: char *val;
611: char **args;
612: const char *cp;
1.7 kristaps 613:
1.17 kristaps 614: /* Disregard empty macros. */
1.22 kristaps 615: if (0 == (valsz = realsz = strlen(m->value))) {
616: args = argparse(p, pos, &asz, m->argsz);
617: for (i = 0; i < asz; i++)
618: free(args[i]);
619: free(args);
1.17 kristaps 620: return;
1.22 kristaps 621: }
1.17 kristaps 622:
623: /*
624: * This is important: it protect us from macros that invoke more
625: * macros, possibly going on infinitely.
626: * We use "sv" instead of the current position because we might
627: * be invoked at the end of the macro (i.e., insplice == 0).
628: * The "sv" value was initialised at the start of the macro.
629: */
630: if (sv > 0)
1.24 kristaps 631: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 632: texierr(p, "maximium recursive depth");
633:
1.14 kristaps 634: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 635: if (asz != m->argsz)
636: texiwarn(p, "invalid macro argument length");
637: aasz = asz < m->argsz ? asz : m->argsz;
638:
639: if (0 == aasz) {
1.21 kristaps 640: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 641: return;
642: }
643:
644: val = strdup(m->value);
645:
646: for (i = j = 0; i < realsz; i++) {
647: /* Parse blindly til the backslash delimiter. */
648: if ('\\' != m->value[i]) {
649: val[j++] = m->value[i];
650: val[j] = '\0';
651: continue;
652: } else if (i == realsz - 1)
653: texierr(p, "trailing argument name delimiter");
654:
655: /* Double-backslash is escaped. */
656: if ('\\' == m->value[i + 1]) {
657: val[j++] = m->value[i++];
658: val[j] = '\0';
659: continue;
660: }
661:
662: assert('\\' == m->value[i] && i < realsz - 1);
663:
664: /* Parse to terminating delimiter. */
665: /* FIXME: embedded, escaped delimiters? */
1.34 schwarze 666: for (start = end = i + 1; end < realsz; end++)
1.7 kristaps 667: if ('\\' == m->value[end])
668: break;
669: if (end == realsz)
670: texierr(p, "unterminated argument name");
671:
672: for (k = 0; k < aasz; k++) {
673: if ((ssz = strlen(m->args[k])) != (end - start))
674: continue;
675: if (strncmp(&m->value[start], m->args[k], ssz))
676: continue;
677: break;
678: }
679:
1.34 schwarze 680: /*
681: * Argument didn't exist in argument table.
1.14 kristaps 682: * Just ignore it.
1.7 kristaps 683: */
684: if (k == aasz) {
1.14 kristaps 685: i = end;
1.7 kristaps 686: continue;
687: }
688:
689: if (strlen(args[k]) > ssz) {
690: valsz += strlen(args[k]);
691: val = realloc(val, valsz + 1);
692: if (NULL == val)
693: texiabort(p, NULL);
694: }
695:
1.34 schwarze 696: for (cp = args[k]; '\0' != *cp; cp++)
1.11 kristaps 697: val[j++] = *cp;
698:
699: val[j] = '\0';
1.7 kristaps 700: i = end;
701: }
702:
1.21 kristaps 703: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 704:
705: for (i = 0; i < asz; i++)
706: free(args[i]);
707: free(args);
708: free(val);
1.34 schwarze 709: }
1.7 kristaps 710:
1.1 kristaps 711: /*
712: * Output a free-form word in the input stream, progressing to the next
713: * command or white-space.
714: * This also will advance the input stream.
715: */
716: static void
1.14 kristaps 717: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 718: {
1.34 schwarze 719: size_t i, end, len;
720: int c;
1.1 kristaps 721:
1.25 kristaps 722: /*
1.27 kristaps 723: * If a prior word had a terminating double-newline, then begin
724: * this text block with a `Pp'.
725: * We don't do this if we're in a literal context (we'll print
726: * out the newlines themselves) nor in a `TS' table.
727: */
1.30 kristaps 728: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
729: if (p->outcol > 0)
730: fputc('\n', p->outfile);
731: fputs(".Pp\n", p->outfile);
732: p->outcol = 0;
733: }
1.27 kristaps 734:
735: /*
1.25 kristaps 736: * Some line control: if we (non-macro, non-literal) already
737: * have more than 72 characters written to the screen, then
738: * output a newline before getting started.
739: */
1.34 schwarze 740: if (p->seenws && 0 == p->outmacro &&
1.1 kristaps 741: p->outcol > 72 && 0 == p->literal)
742: texiputchar(p, '\n');
1.25 kristaps 743:
744: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 745: if (p->seenws && p->outcol && 0 == p->literal)
746: texiputchar(p, ' ');
747:
748: p->seenws = 0;
1.29 kristaps 749:
750: /*
751: * If we're in a macro line, we might want to print text that
752: * happens to be the same as an mdoc(7) macro.
753: * Obviously, we need to escape these words.
754: */
755: if (p->outmacro) {
756: end = *pos;
757: /* Read ahead to get the word length. */
758: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
759: switch ((c = BUF(p)[end])) {
760: case ('@'):
761: case ('}'):
762: case ('{'):
763: break;
764: default:
765: if ('\0' != extra && extra == c)
766: break;
767: end++;
768: continue;
769: }
770: break;
771: }
772: len = end - *pos;
773: /* See if we have a match. */
774: for (i = 0; NULL != mdocs[i]; i++) {
775: /* All macros are 2 or three letters. */
776: if (len < 2 || len > 3)
777: continue;
778: /* Check the macro word length. */
779: if ('\0' == mdocs[i][2] && 2 != len)
780: continue;
781: else if ('\0' == mdocs[i][3] && 3 != len)
782: continue;
783: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
784: continue;
785: texiputchars(p, "\\&");
786: break;
787: }
788: }
1.1 kristaps 789:
1.14 kristaps 790: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
791: switch (BUF(p)[*pos]) {
1.1 kristaps 792: case ('@'):
793: case ('}'):
794: case ('{'):
795: return;
796: }
1.14 kristaps 797: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 798: return;
1.28 kristaps 799:
800: if (p->literal) {
801: texiputchar(p, BUF(p)[*pos]);
802: advance(p, pos);
803: continue;
804: }
805:
1.30 kristaps 806: if ('"' == BUF(p)[*pos]) {
807: texiputchars(p, "\\(dq");
1.34 schwarze 808: } else if (*pos < BUFSZ(p) - 2 &&
809: '-' == BUF(p)[*pos] &&
810: '-' == BUF(p)[*pos + 1] &&
1.28 kristaps 811: '-' == BUF(p)[*pos + 2]) {
812: texiputchars(p, "\\(em");
813: advance(p, pos);
814: advance(p, pos);
1.34 schwarze 815: } else if (*pos < BUFSZ(p) - 1 &&
816: '-' == BUF(p)[*pos] &&
1.28 kristaps 817: '-' == BUF(p)[*pos + 1]) {
818: texiputchars(p, "\\(en");
819: advance(p, pos);
1.34 schwarze 820: } else if (*pos < BUFSZ(p) - 1 &&
821: '`' == BUF(p)[*pos] &&
1.14 kristaps 822: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 823: texiputchars(p, "\\(lq");
1.14 kristaps 824: advance(p, pos);
1.34 schwarze 825: } else if (*pos < BUFSZ(p) - 1 &&
826: '\'' == BUF(p)[*pos] &&
1.14 kristaps 827: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 828: texiputchars(p, "\\(rq");
1.14 kristaps 829: advance(p, pos);
1.1 kristaps 830: } else
1.14 kristaps 831: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 832:
1.14 kristaps 833: advance(p, pos);
1.1 kristaps 834: }
1.25 kristaps 835:
1.34 schwarze 836: /*
1.25 kristaps 837: * New sentence, new line:if we (non-macro, non-literal) see a
838: * period at the end of the last printed word, then open a
839: * newline.
840: */
1.30 kristaps 841: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
842: switch (BUF(p)[*pos - 1]) {
843: case ('.'):
844: case ('!'):
845: case ('?'):
846: texiputchar(p, '\n');
847: break;
848: default:
849: break;
850: }
851:
852: p->seenvs = 0;
1.1 kristaps 853: }
854:
855: /*
856: * Look up the command at position "pos" in the buffer, returning it (or
857: * TEXICMD__MAX if none found) and setting "end" to be the absolute
858: * index after the command name.
859: */
860: enum texicmd
1.19 kristaps 861: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 862: {
1.4 kristaps 863: size_t i, len, toksz;
1.1 kristaps 864:
1.14 kristaps 865: assert('@' == BUF(p)[pos]);
1.1 kristaps 866:
1.7 kristaps 867: if (NULL != macro)
868: *macro = NULL;
869:
1.14 kristaps 870: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 871: return(TEXICMD__MAX);
1.14 kristaps 872: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 873: return(TEXICMD__MAX);
874:
875: /* Alphabetic commands are special. */
1.23 kristaps 876: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 877: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 878: return(TEXICMD__MAX);
879: for (i = 0; i < TEXICMD__MAX; i++) {
880: if (1 != texitoks[i].len)
881: continue;
1.14 kristaps 882: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 883: return(i);
884: }
1.14 kristaps 885: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 886: return(TEXICMD__MAX);
887: }
888:
1.4 kristaps 889: /* Scan to the end of the possible command name. */
1.14 kristaps 890: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
1.34 schwarze 891: if ((*end > pos && ('@' == BUF(p)[*end] ||
1.14 kristaps 892: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 893: break;
894:
1.4 kristaps 895: /* Look for the command. */
1.1 kristaps 896: len = *end - pos;
897: for (i = 0; i < TEXICMD__MAX; i++) {
898: if (len != texitoks[i].len)
899: continue;
1.14 kristaps 900: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 901: return(i);
902: }
903:
1.4 kristaps 904: /* Look for it in our indices. */
905: for (i = 0; i < p->indexsz; i++) {
1.30 kristaps 906: toksz = strlen(p->indexs[i].name);
1.4 kristaps 907: if (len != 5 + toksz)
908: continue;
1.30 kristaps 909: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 910: continue;
1.14 kristaps 911: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 912: return(TEXICMD_USER_INDEX);
913: }
914:
915: for (i = 0; i < p->macrosz; i++) {
916: if (len != strlen(p->macros[i].key))
917: continue;
1.14 kristaps 918: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 919: continue;
920: if (NULL != macro)
921: *macro = &p->macros[i];
922: return(TEXICMD__MAX);
1.4 kristaps 923: }
924:
1.14 kristaps 925: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 926: return(TEXICMD__MAX);
927: }
928:
929: /*
930: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
931: * Num should be set to the argument we're currently parsing, although
932: * it suffixes for it to be zero or non-zero.
933: * This will return 1 if there are more arguments, 0 otherwise.
934: * This will stop (returning 0) in the event of EOF or if we're not at a
935: * bracket for the zeroth parse.
936: */
937: int
1.14 kristaps 938: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 939: {
1.17 kristaps 940: size_t end, sv;
1.7 kristaps 941: enum texicmd cmd;
942: struct teximacro *macro;
1.1 kristaps 943:
1.14 kristaps 944: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
945: advance(p, pos);
946: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 947: return(0);
948: if (0 == num)
1.14 kristaps 949: advance(p, pos);
1.1 kristaps 950:
1.14 kristaps 951: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
952: switch (BUF(p)[*pos]) {
1.1 kristaps 953: case (','):
1.14 kristaps 954: advance(p, pos);
1.1 kristaps 955: return(1);
956: case ('}'):
1.14 kristaps 957: advance(p, pos);
1.1 kristaps 958: return(0);
959: case ('{'):
960: if (0 == p->ign)
961: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 962: advance(p, pos);
1.1 kristaps 963: continue;
964: case ('@'):
965: break;
966: default:
1.14 kristaps 967: parseword(p, pos, ',');
1.1 kristaps 968: continue;
969: }
970:
1.17 kristaps 971: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 972: cmd = texicmd(p, *pos, &end, ¯o);
973: advanceto(p, pos, end);
1.7 kristaps 974: if (NULL != macro)
1.17 kristaps 975: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 976: if (TEXICMD__MAX == cmd)
1.1 kristaps 977: continue;
978: if (NULL != texitoks[cmd].fp)
1.14 kristaps 979: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 980: }
981: return(0);
982: }
983:
984: /*
985: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
986: * This will stop in the event of EOF or if we're not at a bracket.
987: */
988: void
1.18 kristaps 989: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 990: {
1.18 kristaps 991: size_t end, sv, stack;
1.7 kristaps 992: enum texicmd cmd;
993: struct teximacro *macro;
1.1 kristaps 994:
1.14 kristaps 995: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
996: advance(p, pos);
1.1 kristaps 997:
1.14 kristaps 998: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 999: return;
1.14 kristaps 1000: advance(p, pos);
1.1 kristaps 1001:
1.18 kristaps 1002: stack = 0;
1.14 kristaps 1003: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1004: switch (BUF(p)[*pos]) {
1.1 kristaps 1005: case ('}'):
1.18 kristaps 1006: if (stack > 0) {
1007: stack--;
1008: advance(p, pos);
1009: texiputchar(p, '}');
1010: continue;
1011: }
1.14 kristaps 1012: advance(p, pos);
1.1 kristaps 1013: return;
1014: case ('{'):
1.18 kristaps 1015: if (dostack) {
1016: stack++;
1017: advance(p, pos);
1018: texiputchar(p, '{');
1019: continue;
1020: }
1.1 kristaps 1021: if (0 == p->ign)
1022: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1023: advance(p, pos);
1.1 kristaps 1024: continue;
1025: case ('@'):
1026: break;
1027: default:
1.14 kristaps 1028: parseword(p, pos, '\0');
1.1 kristaps 1029: continue;
1030: }
1031:
1.17 kristaps 1032: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1033: cmd = texicmd(p, *pos, &end, ¯o);
1034: advanceto(p, pos, end);
1.7 kristaps 1035: if (NULL != macro)
1.17 kristaps 1036: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1037: if (TEXICMD__MAX == cmd)
1.1 kristaps 1038: continue;
1039: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1040: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1041: }
1042: }
1043:
1044: /*
1045: * This should be invoked when we're on a macro line and want to process
1046: * to the end of the current input line, doing all of our macros along
1047: * the way.
1048: */
1049: void
1.14 kristaps 1050: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1051: {
1.17 kristaps 1052: size_t end, sv;
1.7 kristaps 1053: enum texicmd cmd;
1054: struct teximacro *macro;
1.1 kristaps 1055:
1.14 kristaps 1056: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1057: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1058: p->seenws = 1;
1059: if (p->literal)
1.14 kristaps 1060: texiputchar(p, BUF(p)[*pos]);
1061: advance(p, pos);
1.33 kristaps 1062: }
1063: if (*pos == BUFSZ(p)) {
1064: texiwarn(p, "unexpected EOF");
1065: return;
1.1 kristaps 1066: }
1.14 kristaps 1067: switch (BUF(p)[*pos]) {
1.1 kristaps 1068: case ('}'):
1069: if (0 == p->ign)
1070: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1071: advance(p, pos);
1.1 kristaps 1072: continue;
1073: case ('{'):
1074: if (0 == p->ign)
1075: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1076: advance(p, pos);
1.1 kristaps 1077: continue;
1.30 kristaps 1078: case ('\n'):
1079: continue;
1.1 kristaps 1080: case ('@'):
1081: break;
1082: default:
1.14 kristaps 1083: parseword(p, pos, '\0');
1.1 kristaps 1084: continue;
1085: }
1086:
1.17 kristaps 1087: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1088: cmd = texicmd(p, *pos, &end, ¯o);
1089: advanceto(p, pos, end);
1.7 kristaps 1090: if (NULL != macro)
1.17 kristaps 1091: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1092: if (TEXICMD__MAX == cmd)
1.1 kristaps 1093: continue;
1094: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1095: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1096: }
1.14 kristaps 1097:
1098: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1099: advance(p, pos);
1.19 kristaps 1100: }
1101:
1.30 kristaps 1102: enum texicmd
1103: peeklinecmd(const struct texi *p, size_t pos)
1104: {
1105: size_t end;
1106:
1107: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
1108: pos++;
1109: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1110: return(TEXICMD__MAX);
1111: return(texicmd(p, pos, &end, NULL));
1112: }
1113:
1.19 kristaps 1114: /*
1115: * Peek to see if there's a command after subsequent whitespace.
1116: * If so, return the macro identifier.
1117: * This DOES NOT work with user-defined macros.
1118: */
1119: enum texicmd
1120: peekcmd(const struct texi *p, size_t pos)
1121: {
1122: size_t end;
1123:
1124: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1125: pos++;
1126: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1127: return(TEXICMD__MAX);
1128: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1129: }
1130:
1131: /*
1132: * Parse a single word or command.
1133: * This will return immediately at the EOF.
1134: */
1.32 kristaps 1135: void
1.14 kristaps 1136: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1137: {
1.17 kristaps 1138: size_t end, sv;
1.7 kristaps 1139: enum texicmd cmd;
1140: struct teximacro *macro;
1.1 kristaps 1141:
1.14 kristaps 1142: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1143: return;
1144:
1.14 kristaps 1145: switch (BUF(p)[*pos]) {
1.1 kristaps 1146: case ('}'):
1147: if (0 == p->ign)
1148: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1149: advance(p, pos);
1.1 kristaps 1150: return;
1151: case ('{'):
1152: if (0 == p->ign)
1153: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1154: advance(p, pos);
1.1 kristaps 1155: return;
1156: case ('@'):
1157: break;
1158: default:
1.14 kristaps 1159: parseword(p, pos, '\0');
1.1 kristaps 1160: return;
1161: }
1162:
1.17 kristaps 1163: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1164: cmd = texicmd(p, *pos, &end, ¯o);
1165: advanceto(p, pos, end);
1.7 kristaps 1166: if (NULL != macro)
1.17 kristaps 1167: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1168: if (TEXICMD__MAX == cmd)
1.1 kristaps 1169: return;
1170: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1171: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1172: }
1173:
1174: /*
1175: * This is used in the @deffn type of command.
1176: * These have an arbitrary number of line arguments; however, these
1177: * arguments may or may not be surrounded by brackets.
1178: * In this function, we parse each one as either a bracketed or
1179: * non-bracketed argument, returning 0 when we've reached the end of
1180: * line or 1 otherwise.
1181: */
1182: int
1.14 kristaps 1183: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1184: {
1185:
1.14 kristaps 1186: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1187: p->seenws = 1;
1.14 kristaps 1188: advance(p, pos);
1.1 kristaps 1189: }
1190:
1.14 kristaps 1191: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1192: parsebracket(p, pos, 0);
1.14 kristaps 1193: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1194: parsesingle(p, pos);
1.1 kristaps 1195: else
1196: return(0);
1197:
1198: return(1);
1199: }
1200:
1201: /*
1202: * Parse til the end of the buffer.
1203: */
1.14 kristaps 1204: static void
1205: parseeof(struct texi *p)
1.1 kristaps 1206: {
1207: size_t pos;
1208:
1.34 schwarze 1209: for (pos = 0; pos < BUFSZ(p); )
1.14 kristaps 1210: parsesingle(p, &pos);
1.1 kristaps 1211: }
1212:
1.8 kristaps 1213: void
1.21 kristaps 1214: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1215: {
1.14 kristaps 1216: char *cp;
1217: struct texifile *f;
1.8 kristaps 1218:
1.14 kristaps 1219: assert(p->filepos > 0);
1220: f = &p->files[p->filepos - 1];
1.8 kristaps 1221:
1.14 kristaps 1222: if (f->mapsz + sz > f->mapmaxsz) {
1223: f->mapmaxsz = f->mapsz + sz + 1024;
1224: cp = realloc(f->map, f->mapmaxsz);
1225: if (NULL == cp)
1226: texiabort(p, NULL);
1227: f->map = cp;
1228: }
1.8 kristaps 1229:
1.15 kristaps 1230: f->insplice += sz;
1.21 kristaps 1231: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1232: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1233: f->mapsz += sz;
1.8 kristaps 1234: }
1235:
1236: /*
1.1 kristaps 1237: * Parse a block sequence until we have the "@end endtoken" command
1238: * invocation.
1239: * This will return immediately at EOF.
1240: */
1241: void
1.14 kristaps 1242: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1243: {
1.17 kristaps 1244: size_t end, sv;
1.7 kristaps 1245: enum texicmd cmd;
1246: size_t endtoksz;
1247: struct teximacro *macro;
1.1 kristaps 1248:
1249: endtoksz = strlen(endtoken);
1250: assert(endtoksz > 0);
1.34 schwarze 1251:
1.14 kristaps 1252: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1253: switch (BUF(p)[*pos]) {
1.1 kristaps 1254: case ('}'):
1255: if (0 == p->ign)
1256: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1257: advance(p, pos);
1.1 kristaps 1258: continue;
1259: case ('{'):
1260: if (0 == p->ign)
1261: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1262: advance(p, pos);
1.1 kristaps 1263: continue;
1264: case ('@'):
1265: break;
1266: default:
1.14 kristaps 1267: parseword(p, pos, '\0');
1.1 kristaps 1268: continue;
1269: }
1270:
1.17 kristaps 1271: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1272: cmd = texicmd(p, *pos, &end, ¯o);
1273: advanceto(p, pos, end);
1.1 kristaps 1274: if (TEXICMD_END == cmd) {
1.14 kristaps 1275: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1276: advance(p, pos);
1.34 schwarze 1277: /*
1.1 kristaps 1278: * FIXME: check the full word, not just its
1279: * initial substring!
1280: */
1.14 kristaps 1281: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1282: (&BUF(p)[*pos], endtoken, endtoksz)) {
1283: advanceeoln(p, pos, 0);
1.1 kristaps 1284: break;
1285: }
1286: if (0 == p->ign)
1287: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1288: advanceeoln(p, pos, 0);
1.1 kristaps 1289: continue;
1.34 schwarze 1290: }
1.7 kristaps 1291: if (NULL != macro)
1.17 kristaps 1292: texiexecmacro(p, macro, sv, pos);
1.34 schwarze 1293: if (TEXICMD__MAX == cmd)
1.7 kristaps 1294: continue;
1.34 schwarze 1295: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1296: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1297: }
1.30 kristaps 1298:
1.34 schwarze 1299: if (*pos == BUFSZ(p))
1.30 kristaps 1300: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1301: }
1302:
1303: /*
1.12 kristaps 1304: * Like parsefile() but used for reading from stdandard input.
1305: * This can only be called for the first file!
1306: */
1307: void
1308: parsestdin(struct texi *p)
1309: {
1310: struct texifile *f;
1311: ssize_t ssz;
1312:
1313: assert(0 == p->filepos);
1314: f = &p->files[p->filepos];
1315: memset(f, 0, sizeof(struct texifile));
1316:
1317: f->type = TEXISRC_STDIN;
1318: f->name = "<stdin>";
1319:
1.14 kristaps 1320: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1321: if (f->mapsz == f->mapmaxsz) {
1322: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1323: texierr(p, "stdin buffer too long");
1.34 schwarze 1324: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1.14 kristaps 1325: 2 * f->mapmaxsz : 65536;
1326: f->map = realloc(f->map, f->mapmaxsz);
1.34 schwarze 1327: if (NULL == f->map)
1.12 kristaps 1328: texiabort(p, NULL);
1329: }
1.34 schwarze 1330: ssz = read(STDIN_FILENO, f->map +
1.14 kristaps 1331: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1332: if (0 == ssz)
1333: break;
1.34 schwarze 1334: else if (-1 == ssz)
1.12 kristaps 1335: texiabort(p, NULL);
1336: }
1337:
1338: p->filepos++;
1.14 kristaps 1339: parseeof(p);
1.12 kristaps 1340: texifilepop(p);
1341: }
1342:
1343: /*
1.1 kristaps 1344: * Memory-map the file "fname" and begin parsing it unless "parse" is
1345: * zero, in which case we just dump the file to stdout (making sure it
1346: * doesn't trip up mdoc(7) along the way).
1347: * This can be called in a nested context.
1348: */
1349: void
1350: parsefile(struct texi *p, const char *fname, int parse)
1351: {
1352: struct texifile *f;
1353: int fd;
1354: struct stat st;
1355: size_t i;
1.14 kristaps 1356: char *map;
1.1 kristaps 1357:
1.34 schwarze 1358: if (64 == p->filepos)
1.6 kristaps 1359: texierr(p, "too many open files");
1.1 kristaps 1360: f = &p->files[p->filepos];
1361: memset(f, 0, sizeof(struct texifile));
1362:
1.12 kristaps 1363: f->type = TEXISRC_FILE;
1.1 kristaps 1364: f->name = fname;
1365: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1366: texiabort(p, fname);
1367: } else if (-1 == fstat(fd, &st)) {
1368: close(fd);
1369: texiabort(p, fname);
1.34 schwarze 1370: }
1.1 kristaps 1371:
1.14 kristaps 1372: f->mapsz = f->mapmaxsz = st.st_size;
1373: map = mmap(NULL, f->mapsz,
1.1 kristaps 1374: PROT_READ, MAP_SHARED, fd, 0);
1375: close(fd);
1376:
1.14 kristaps 1377: if (MAP_FAILED == map)
1.1 kristaps 1378: texiabort(p, fname);
1379:
1380: if ( ! parse) {
1.13 kristaps 1381: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1382: texiputchar(p, map[i]);
1.13 kristaps 1383: if (p->outcol)
1384: texiputchar(p, '\n');
1.14 kristaps 1385: munmap(map, f->mapsz);
1386: return;
1387: }
1388:
1389: p->filepos++;
1390: f->map = malloc(f->mapsz);
1391: memcpy(f->map, map, f->mapsz);
1392: munmap(map, f->mapsz);
1393: parseeof(p);
1.1 kristaps 1394: texifilepop(p);
1395: }
1396:
1.2 kristaps 1397: /*
1398: * Look up the value to a stored pair's value starting in "buf" from
1399: * start to end.
1400: * Return the pointer to the value memory, which can be NULL if the
1401: * pointer key does not exist.
1402: * The pointer can point to NULL if the value has been unset.
1403: */
1404: static char **
1.14 kristaps 1405: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1406: {
1407: size_t i, sz, len;
1408:
1409: assert(end >= start);
1410: /* Ignore zero-length. */
1411: if (0 == (len = (end - start)))
1412: return(NULL);
1413: for (i = 0; i < p->valsz; i++) {
1414: sz = strlen(p->vals[i].key);
1415: if (sz != len)
1416: continue;
1.14 kristaps 1417: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1418: return(&p->vals[i].value);
1419: }
1420: return(NULL);
1421: }
1422:
1423: /*
1424: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1425: * pointer to its value via valuequery().
1426: */
1427: static char **
1.14 kristaps 1428: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1429: {
1430: size_t start, end;
1431: char **ret;
1432:
1.14 kristaps 1433: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1434: advance(p, pos);
1435: if (*pos == BUFSZ(p))
1.2 kristaps 1436: return(NULL);
1.14 kristaps 1437: for (start = end = *pos; end < BUFSZ(p); end++)
1438: if ('\n' == BUF(p)[end])
1.2 kristaps 1439: break;
1.14 kristaps 1440: advanceto(p, pos, end);
1441: if (*pos < BUFSZ(p)) {
1442: assert('\n' == BUF(p)[*pos]);
1443: advance(p, pos);
1.2 kristaps 1444: }
1.14 kristaps 1445: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1446: return(NULL);
1447: return(ret);
1448: }
1449:
1450: void
1.14 kristaps 1451: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1452: {
1453: char **ret;
1454:
1.14 kristaps 1455: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1456: return;
1457: free(*ret);
1458: *ret = NULL;
1459: }
1460:
1461: const char *
1.14 kristaps 1462: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1463: {
1464: char **ret;
1465:
1.14 kristaps 1466: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1467: return(NULL);
1468: return(*ret);
1469: }
1470:
1471: /*
1472: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1473: * the pointer to its value.
1474: * If the returned pointer is NULL, either there was no string within
1475: * the brackets (or no brackets), or the value was not found, or the
1476: * value had previously been unset.
1477: */
1478: const char *
1.14 kristaps 1479: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1480: {
1481: size_t start, end;
1482: char **ret;
1483:
1.14 kristaps 1484: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1485: advance(p, pos);
1486: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1487: return(NULL);
1.14 kristaps 1488: advance(p, pos);
1489: for (start = end = *pos; end < BUFSZ(p); end++)
1490: if ('}' == BUF(p)[end])
1.2 kristaps 1491: break;
1.14 kristaps 1492: advanceto(p, pos, end);
1493: if (*pos < BUFSZ(p)) {
1494: assert('}' == BUF(p)[*pos]);
1495: advance(p, pos);
1.2 kristaps 1496: }
1.14 kristaps 1497: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1498: return(NULL);
1499: return(*ret);
1500: }
1501:
1502: void
1503: valueadd(struct texi *p, char *key, char *val)
1504: {
1505: size_t i;
1506:
1507: assert(NULL != key);
1508: assert(NULL != val);
1509:
1510: for (i = 0; i < p->valsz; i++)
1511: if (0 == strcmp(p->vals[i].key, key))
1512: break;
1513:
1514: if (i < p->valsz) {
1515: free(key);
1516: free(p->vals[i].value);
1517: p->vals[i].value = val;
1518: } else {
1.4 kristaps 1519: /* FIXME: reallocarray() */
1.34 schwarze 1520: p->vals = realloc(p->vals,
1.2 kristaps 1521: (p->valsz + 1) *
1522: sizeof(struct texivalue));
1.4 kristaps 1523: if (NULL == p->vals)
1524: texiabort(p, NULL);
1.2 kristaps 1525: p->vals[p->valsz].key = key;
1526: p->vals[p->valsz].value = val;
1527: p->valsz++;
1528: }
1.7 kristaps 1529: }
1530:
1531: /*
1532: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1533: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1534: * an array of arguments of size "argsz".
1535: * These need to be freed individually and as a whole.
1536: * NOTE: this will puke on @, or @} macros, which can trick it into
1537: * stopping argument parsing earlier.
1538: * Ergo, textual: this doesn't interpret the arguments in any way.
1539: */
1540: char **
1.14 kristaps 1541: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1542: {
1543: char **args;
1544: size_t start, end, stack;
1545:
1.14 kristaps 1546: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1547: advance(p, pos);
1.7 kristaps 1548:
1549: args = NULL;
1550: *argsz = 0;
1551:
1.17 kristaps 1552: if (*pos == BUFSZ(p))
1553: return(args);
1554:
1.14 kristaps 1555: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1556: /*
1557: * Special case: if we encounter an unbracketed argument
1558: * and we're being invoked with non-zero arguments
1559: * (versus being set, i.e., hint>0), then parse until
1560: * the end of line.
1561: */
1562: *argsz = 1;
1563: args = calloc(1, sizeof(char *));
1564: if (NULL == args)
1565: texiabort(p, NULL);
1566: start = *pos;
1.14 kristaps 1567: while (*pos < BUFSZ(p)) {
1568: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1569: break;
1.14 kristaps 1570: advance(p, pos);
1.10 kristaps 1571: }
1572: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1573: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1574: args[0][*pos - start] = '\0';
1.14 kristaps 1575: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1576: advance(p, pos);
1.10 kristaps 1577: return(args);
1.14 kristaps 1578: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1579: return(args);
1.17 kristaps 1580:
1581: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1582:
1583: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1584: advance(p, pos);
1585: while (*pos < BUFSZ(p)) {
1586: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1587: advance(p, pos);
1.7 kristaps 1588: start = *pos;
1589: stack = 0;
1.14 kristaps 1590: while (*pos < BUFSZ(p)) {
1.34 schwarze 1591: /*
1.7 kristaps 1592: * According to the manual, commas within
1593: * embedded commands are escaped.
1594: * We keep track of embedded-ness in the "stack"
1595: * state anyway, so this is free.
1596: */
1.14 kristaps 1597: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1598: break;
1.14 kristaps 1599: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1600: break;
1.14 kristaps 1601: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1602: stack--;
1.14 kristaps 1603: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1604: stack++;
1.14 kristaps 1605: advance(p, pos);
1.7 kristaps 1606: }
1607: if (stack)
1608: texiwarn(p, "unterminated macro "
1609: "in macro arguments");
1.14 kristaps 1610: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1611: break;
1612: /* Test for zero-length '{ }'. */
1.14 kristaps 1613: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1614: break;
1615: /* FIXME: use reallocarray. */
1616: args = realloc
1617: (args, sizeof(char *) *
1618: (*argsz + 1));
1619: if (NULL == args)
1620: texiabort(p, NULL);
1621: args[*argsz] = malloc(end - start + 1);
1622: if (NULL == args[*argsz])
1623: texiabort(p, NULL);
1624: memcpy(args[*argsz],
1.14 kristaps 1625: &BUF(p)[start], end - start);
1.7 kristaps 1626: args[*argsz][end - start] = '\0';
1627: (*argsz)++;
1.14 kristaps 1628: if ('}' == BUF(p)[*pos])
1.7 kristaps 1629: break;
1.14 kristaps 1630: advance(p, pos);
1.7 kristaps 1631: }
1632:
1.34 schwarze 1633: if (*pos == BUFSZ(p))
1.7 kristaps 1634: texierr(p, "unterminated arguments");
1.14 kristaps 1635: assert('}' == BUF(p)[*pos]);
1636: advance(p, pos);
1.7 kristaps 1637: return(args);
1.2 kristaps 1638: }
1.20 kristaps 1639:
1640: /*
1641: * If we're printing chapters, then do some naviation here and then
1642: * close our outfile.
1643: * I want to call this the SEE ALSO section, but that's not really what
1644: * it is: we'll refer to the "initial" (top) node and the next and
1645: * previous chapters.
1646: */
1647: void
1648: teximdocclose(struct texi *p, int last)
1649: {
1650: char buf[PATH_MAX];
1651:
1.32 kristaps 1652: if (NULL == p->chapters || 1 == p->nodesz)
1.20 kristaps 1653: return;
1654:
1655: teximacro(p, "Sh INFO NAVIGATION");
1656:
1657: /* Print a reference to the "top" node. */
1.32 kristaps 1658: if (-1 != p->nodecache[p->nodecur].up) {
1.22 kristaps 1659: texiputchars(p, "Top node,");
1.34 schwarze 1660: snprintf(buf, sizeof(buf), "%s-%zd 7",
1.32 kristaps 1661: p->chapters, p->nodecache[p->nodecur].up);
1.31 kristaps 1662: p->seenvs = 0;
1.20 kristaps 1663: teximacroopen(p, "Xr ");
1664: texiputchars(p, buf);
1.22 kristaps 1665: texiputchars(p, " ;");
1.20 kristaps 1666: teximacroclose(p);
1667: }
1668:
1.32 kristaps 1669: if (-1 != p->nodecache[p->nodecur].prev) {
1.22 kristaps 1670: texiputchars(p, "previous node,");
1.34 schwarze 1671: snprintf(buf, sizeof(buf), "%s-%zd 7",
1.32 kristaps 1672: p->chapters, p->nodecache[p->nodecur].prev);
1.31 kristaps 1673: p->seenvs = 0;
1.20 kristaps 1674: teximacroopen(p, "Xr ");
1675: texiputchars(p, buf);
1.34 schwarze 1676: if ( ! last)
1.22 kristaps 1677: texiputchars(p, " ;");
1.20 kristaps 1678: teximacroclose(p);
1.34 schwarze 1679: }
1.20 kristaps 1680:
1.32 kristaps 1681: if (-1 != p->nodecache[p->nodecur].next) {
1682: texiputchars(p, "next node,");
1683: snprintf(buf, sizeof(buf), "%s-%zd 7",
1684: p->chapters, p->nodecache[p->nodecur].next);
1.31 kristaps 1685: p->seenvs = 0;
1.20 kristaps 1686: teximacroopen(p, "Xr ");
1687: texiputchars(p, buf);
1688: teximacroclose(p);
1689: }
1690:
1691: fclose(p->outfile);
1.32 kristaps 1692: p->outfile = NULL;
1693: }
1694:
1695: ssize_t
1696: texicache(struct texi *p, const char *buf, size_t sz)
1697: {
1698: size_t i;
1699:
1700: for (i = 0; i < p->nodecachesz; i++) {
1701: if (sz != strlen(p->nodecache[i].name))
1702: continue;
1703: if (strncmp(buf, p->nodecache[i].name, sz))
1704: continue;
1705: break;
1706: }
1707: if (i < p->nodecachesz)
1708: return(i);
1709: if (NULL == buf)
1710: return(-1);
1711: p->nodecache = realloc
1.34 schwarze 1712: (p->nodecache,
1.32 kristaps 1713: (p->nodecachesz + 1) * sizeof(struct texinode));
1714: if (NULL == p->nodecache)
1715: texiabort(p, NULL);
1716: p->nodecache[p->nodecachesz].name = malloc(sz + 1);
1717: if (NULL == p->nodecache[p->nodecachesz].name)
1718: texiabort(p, NULL);
1719: memcpy(p->nodecache[p->nodecachesz].name, buf, sz);
1720: p->nodecache[p->nodecachesz].name[sz] = '\0';
1721: p->nodecache[p->nodecachesz].up =
1722: p->nodecache[p->nodecachesz].next =
1723: p->nodecache[p->nodecachesz].prev = -1;
1724: p->nodecachesz++;
1725: return(p->nodecachesz - 1);
1.20 kristaps 1726: }
1727:
1728: /*
1.32 kristaps 1729: * Here we print our standard mdoc(7) prologue.
1730: * We use the title set with @settitle for the `Nd' description
1731: * and the source document filename (the first one as invoked on
1732: * the command line) for the title.
1.35 ! schwarze 1733: * The date is set to the modification time of the input.
1.20 kristaps 1734: */
1735: void
1.21 kristaps 1736: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1737: {
1738: const char *cp;
1739:
1.30 kristaps 1740: p->seenvs = -1;
1.20 kristaps 1741: teximacroopen(p, "Dd");
1.35 ! schwarze 1742: texiputchars(p, p->date);
1.20 kristaps 1743: teximacroclose(p);
1744: teximacroopen(p, "Dt");
1745: for (cp = p->title; '\0' != *cp; cp++)
1746: texiputchar(p, toupper((unsigned int)*cp));
1747: texiputchars(p, " 7");
1748: teximacroclose(p);
1749: teximacro(p, "Os");
1750: teximacro(p, "Sh NAME");
1751: teximacroopen(p, "Nm");
1752: for (cp = p->title; '\0' != *cp; cp++)
1753: texiputchar(p, *cp);
1754: teximacroclose(p);
1755: teximacroopen(p, "Nd");
1.21 kristaps 1756: /*
1757: * The subtitle `Nd' can consist of arbitrary macros, so paste
1758: * it and parse to the end of the line.
1759: */
1760: if (NULL != p->subtitle) {
1761: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1762: parseeoln(p, pos);
1763: } else
1.20 kristaps 1764: texiputchars(p, "Unknown description");
1765: teximacroclose(p);
1766: }
1767:
CVSweb