Annotation of texi2mdoc/util.c, Revision 1.31
1.31 ! kristaps 1: /* $Id: util.c,v 1.30 2015/03/11 12:51:41 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <time.h>
29: #include <unistd.h>
30:
31: #include "extern.h"
32:
33: /*
1.29 kristaps 34: * Table of macros.
35: * These ABSOLUTELY MUST BE 2 or three characters long.
36: */
37: static const char *const mdocs[] = {
38: "Ap", "Dd", "Dt", "Os",
39: "Sh", "Ss", "Pp", "D1",
40: "Dl", "Bd", "Ed", "Bl",
41: "El", "It", "Ad", "An",
42: "Ar", "Cd", "Cm", "Dv",
43: "Er", "Ev", "Ex", "Fa",
44: "Fd", "Fl", "Fn", "Ft",
45: "Ic", "In", "Li", "Nd",
46: "Nm", "Op", "Ot", "Pa",
47: "Rv", "St", "Va", "Vt",
48: "Xr", "%A", "%B", "%D",
49: "%I", "%J", "%N", "%O",
50: "%P", "%R", "%T", "%V",
51: "Ac", "Ao", "Aq", "At",
52: "Bc", "Bf", "Bo", "Bq",
53: "Bsx", "Bx", "Db", "Dc",
54: "Do", "Dq", "Ec", "Ef",
55: "Em", "Eo", "Fx", "Ms",
56: "No", "Ns", "Nx", "Ox",
57: "Pc", "Pf", "Po", "Pq",
58: "Qc", "Ql", "Qo", "Qq",
59: "Re", "Rs", "Sc", "So",
60: "Sq", "Sm", "Sx", "Sy",
61: "Tn", "Ux", "Xc", "Xo",
62: "Fo", "Fc", "Oo", "Oc",
63: "Bk", "Ek", "Bt", "Hf",
64: "Fr", "Ud", "Lb", "Lp",
65: "Lk", "Mt", "Brq", "Bro",
66: "Brc", "%C", "Es", "En",
67: "Dx", "%Q", "br", "sp",
68: "%U", "Ta", "ll", NULL,
69: };
70:
71: /*
1.1 kristaps 72: * Unmap the top-most file in the stack of files currently opened (that
73: * is, nested calls to parsefile()).
74: */
75: void
76: texifilepop(struct texi *p)
77: {
78: struct texifile *f;
79:
80: assert(p->filepos > 0);
81: f = &p->files[--p->filepos];
1.14 kristaps 82: free(f->map);
1.1 kristaps 83: }
84:
1.7 kristaps 85: static void
86: teximacrofree(struct teximacro *p)
87: {
88: size_t i;
89:
90: for (i = 0; i < p->argsz; i++)
91: free(p->args[i]);
92:
93: free(p->args);
94: free(p->key);
95: free(p->value);
96: }
97:
98: static void
99: texivaluefree(struct texivalue *p)
100: {
101:
102: free(p->key);
103: free(p->value);
104: }
105:
1.30 kristaps 106: static void
107: texidex_free(struct texidex *p)
108: {
109: size_t i;
110:
111: for (i = 0; i < p->indexsz; i++)
1.31 ! kristaps 112: free(p->index[i].term);
1.30 kristaps 113:
114: free(p->index);
115: free(p->name);
116: p->index = NULL;
117: p->indexsz = 0;
118: }
119:
120: /*
121: * Add the text beginning at "index" and of "sz" bytes to the index
122: * named "tok" with name size "toksz".
123: * This will also output the necessary mdoc(7) to construct the index.
124: */
125: void
126: texindex(struct texi *p, const char *tok,
127: size_t toksz, const char *index, size_t sz)
128: {
1.31 ! kristaps 129: size_t i, isz;
1.30 kristaps 130: #ifdef HAVE_INDEX
131: char *cp;
132: #endif
133:
134: if (0 == sz) {
135: texiwarn(p, "zero-length index entry");
136: return;
137: }
138:
139: /* Look for the index. (Must be found.) */
140: for (i = 0; i < p->indexsz; i++) {
141: if (strlen(p->indexs[i].name) != toksz)
142: continue;
143: if (strncmp(p->indexs[i].name, tok, toksz))
144: continue;
145: break;
146: }
147:
148: assert(i < p->indexsz);
1.31 ! kristaps 149: isz = p->indexs[i].indexsz;
1.30 kristaps 150: /* Reallocate index's terms. */
151: p->indexs[i].index = realloc
152: (p->indexs[i].index,
1.31 ! kristaps 153: (isz + 1) * sizeof(struct texiterm));
1.30 kristaps 154: if (NULL == p->indexs[i].index)
155: texiabort(p, NULL);
156:
157: /* Add term to term array. */
1.31 ! kristaps 158: p->indexs[i].index[isz].chapter = p->nodesz - 1;
! 159: p->indexs[i].index[isz].term = malloc(sz + 1);
! 160: if (NULL == p->indexs[i].index[isz].term)
1.30 kristaps 161: texiabort(p, NULL);
1.31 ! kristaps 162: memcpy(p->indexs[i].index[isz].term, index, sz);
! 163: p->indexs[i].index[isz].term[sz] = '\0';
1.30 kristaps 164:
165: /* Output mdoc(7) for index. */
166: #ifdef HAVE_INDEX
167: p->seenvs = -1;
168: teximacroopen(p, "Ix");
169: texiputchars(p, "idx");
170: texiputchars(p, p->indexs[i].name);
1.31 ! kristaps 171: cp = p->indexs[i].index[isz].term;
1.30 kristaps 172: while ('\n' != *cp) {
173: assert('\0' != *cp);
174: texiputchar(p, *cp++);
175: }
176: teximacroclose(p);
177: #endif
178: p->indexs[i].indexsz++;
179: }
180:
181: /*
182: * Add an index entry named "tok" of length "sz".
183: * This usually consists of two letters, e.g., "cp" or "vr".
184: * This does nothing if the index exists or is zero-sized.
185: */
186: void
187: texindex_add(struct texi *p, const char *tok, size_t sz)
188: {
189: size_t i;
190: char *cp;
191:
192: if (0 == sz)
193: return;
194:
195: /* Make sure we don't have a duplicate. */
196: for (i = 0; i < p->indexsz; i++) {
197: if (strlen(p->indexs[i].name) != sz)
198: continue;
199: if (strncmp(p->indexs[i].name, tok, sz))
200: continue;
201: return;
202: }
203:
204: /* Reallocate indices. */
205: p->indexs = realloc(p->indexs,
1.31 ! kristaps 206: sizeof(struct texidex) *
! 207: (p->indexsz + 1));
1.30 kristaps 208: if (NULL == p->indexs)
209: texiabort(p, NULL);
210: if (NULL == (cp = malloc(sz + 1)))
211: texiabort(p, NULL);
212: memcpy(cp, tok, sz);
213: cp[sz] = '\0';
214: p->indexs[p->indexsz].name = cp;
215: p->indexs[p->indexsz].index = NULL;
216: p->indexs[p->indexsz].indexsz = 0;
217: p->indexsz++;
218: }
219:
1.1 kristaps 220: /*
221: * Unmap all files that we're currently using and free all resources
222: * that we've allocated during the parse.
223: * The utility should exit(...) after this is called.
224: */
225: void
226: texiexit(struct texi *p)
227: {
228: size_t i;
229:
230: /* Make sure we're newline-terminated. */
231: if (p->outcol)
1.20 kristaps 232: fputc('\n', p->outfile);
233: if (NULL != p->chapters)
234: teximdocclose(p, 1);
1.1 kristaps 235:
236: /* Unmap all files. */
237: while (p->filepos > 0)
238: texifilepop(p);
239:
1.7 kristaps 240: for (i = 0; i < p->macrosz; i++)
241: teximacrofree(&p->macros[i]);
1.1 kristaps 242: for (i = 0; i < p->dirsz; i++)
243: free(p->dirs[i]);
1.4 kristaps 244: for (i = 0; i < p->indexsz; i++)
1.30 kristaps 245: texidex_free(&p->indexs[i]);
1.7 kristaps 246: for (i = 0; i < p->valsz; i++)
247: texivaluefree(&p->vals[i]);
1.4 kristaps 248:
1.31 ! kristaps 249: free(p->nodes);
1.7 kristaps 250: free(p->macros);
1.1 kristaps 251: free(p->vals);
1.4 kristaps 252: free(p->indexs);
1.1 kristaps 253: free(p->dirs);
254: free(p->subtitle);
255: free(p->title);
1.26 kristaps 256: free(p->copying);
1.1 kristaps 257: }
258:
259: /*
260: * Fatal error: unmap all files and exit.
261: * The "errstring" is passed to perror(3).
262: */
263: void
264: texiabort(struct texi *p, const char *errstring)
265: {
266:
267: perror(errstring);
268: texiexit(p);
269: exit(EXIT_FAILURE);
270: }
271:
272: /*
273: * Print a generic warning message (to stderr) tied to our current
274: * location in the parse sequence.
275: */
276: void
277: texiwarn(const struct texi *p, const char *fmt, ...)
278: {
1.15 kristaps 279: va_list ap;
280: const struct texifile *f;
281:
282: f = &p->files[p->filepos - 1];
283:
284: if (f->insplice)
285: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
286: "warning: ", f->name, f->line + 1,
287: f->col + 1, f->insplice);
288: else
289: fprintf(stderr, "%s:%zu:%zu: warning: ",
290: f->name, f->line + 1, f->col + 1);
1.1 kristaps 291:
292: va_start(ap, fmt);
293: vfprintf(stderr, fmt, ap);
294: va_end(ap);
295: fputc('\n', stderr);
296: }
297:
298: /*
299: * Print an error message (to stderr) tied to our current location in
300: * the parse sequence, invoke texiexit(), then die.
301: */
302: void
303: texierr(struct texi *p, const char *fmt, ...)
304: {
1.15 kristaps 305: va_list ap;
306: struct texifile *f;
307:
308: f = &p->files[p->filepos - 1];
309:
310: if (f->insplice)
311: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
312: "error: ", f->name, f->line + 1,
313: f->col + 1, f->insplice);
314: else
315: fprintf(stderr, "%s:%zu:%zu: error: ",
316: f->name, f->line + 1, f->col + 1);
1.1 kristaps 317:
318: va_start(ap, fmt);
319: vfprintf(stderr, fmt, ap);
320: va_end(ap);
321: fputc('\n', stderr);
322: texiexit(p);
323: exit(EXIT_FAILURE);
324: }
325:
326: /*
327: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 328: * Escape starting a line with a control character and slashes.
1.1 kristaps 329: */
330: void
331: texiputchar(struct texi *p, char c)
332: {
333:
334: if (p->ign)
335: return;
336: if ('.' == c && 0 == p->outcol)
1.20 kristaps 337: fputs("\\&", p->outfile);
1.10 kristaps 338: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 339: fputs("\\&", p->outfile);
1.1 kristaps 340:
1.23 kristaps 341: if (p->uppercase)
342: fputc(toupper((unsigned int)c), p->outfile);
343: else
344: fputc(c, p->outfile);
1.13 kristaps 345: if ('\\' == c)
1.20 kristaps 346: fputc('e', p->outfile);
1.1 kristaps 347: if ('\n' == c) {
348: p->outcol = 0;
349: p->seenws = 0;
350: } else
351: p->outcol++;
352: }
353:
354: /*
1.13 kristaps 355: * Put an opaque series of characters.
356: * Characters starting a line with a control character are escaped, but
357: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 358: */
359: void
360: texiputchars(struct texi *p, const char *s)
361: {
362:
1.13 kristaps 363: if (p->ign)
364: return;
365: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 366: fputs("\\&", p->outfile);
1.13 kristaps 367: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 368: fputs("\\&", p->outfile);
1.23 kristaps 369: if (p->uppercase)
370: for ( ; '\0' != *s; s++)
371: p->outcol += fputc(toupper
372: ((unsigned int)*s), p->outfile);
373: else
374: p->outcol += fputs(s, p->outfile);
1.9 kristaps 375: }
376:
377: /*
378: * This puts all characters onto the output stream but makes sure to
379: * escape mdoc(7) slashes.
1.14 kristaps 380: * FIXME: useless.
1.9 kristaps 381: */
382: void
1.14 kristaps 383: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 384: {
385:
1.14 kristaps 386: for ( ; start < end; start++)
387: texiputchar(p, BUF(p)[start]);
1.1 kristaps 388: }
389:
390: /*
391: * Close an mdoc(7) macro opened with teximacroopen().
392: * If there are no more macros on the line, prints a newline.
393: */
394: void
395: teximacroclose(struct texi *p)
396: {
397:
1.30 kristaps 398: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 399: return;
400:
401: if (0 == --p->outmacro) {
1.20 kristaps 402: fputc('\n', p->outfile);
1.1 kristaps 403: p->outcol = p->seenws = 0;
404: }
405: }
406:
407: /*
408: * Open a mdoc(7) macro.
409: * This is used for line macros, e.g., Qq [foo bar baz].
410: * It can be invoked for nested macros, e.g., Qq Li foo .
411: * TODO: flush-right punctuation (e.g., parenthesis).
412: */
413: void
414: teximacroopen(struct texi *p, const char *s)
415: {
416: int rc;
417:
1.30 kristaps 418: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 419: return;
420:
421: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 422: fputc('\n', p->outfile);
1.1 kristaps 423: p->outcol = 0;
424: }
425:
1.30 kristaps 426: if (p->seenvs > 0 && 0 == p->outmacro)
427: fputs(".Pp\n", p->outfile);
428:
1.1 kristaps 429: if (0 == p->outmacro)
1.20 kristaps 430: fputc('.', p->outfile);
1.1 kristaps 431: else
1.20 kristaps 432: fputc(' ', p->outfile);
1.1 kristaps 433:
1.20 kristaps 434: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 435: p->outcol += rc;
436:
1.20 kristaps 437: fputc(' ', p->outfile);
1.1 kristaps 438: p->outcol++;
439: p->outmacro++;
1.30 kristaps 440: p->seenws = p->seenvs = 0;
1.1 kristaps 441: }
442:
443: /*
444: * Put a stadnalone mdoc(7) command with the trailing newline.
445: */
446: void
447: teximacro(struct texi *p, const char *s)
448: {
449:
450: if (p->ign)
451: return;
452:
453: if (p->outmacro)
454: texierr(p, "\"%s\" in open line scope!?", s);
455: if (p->literal)
456: texierr(p, "\"%s\" in a literal scope!?", s);
457: if (p->outcol)
1.20 kristaps 458: fputc('\n', p->outfile);
1.30 kristaps 459: if (p->seenvs > 0)
460: fputs(".Pp\n", p->outfile);
1.1 kristaps 461:
1.20 kristaps 462: fputc('.', p->outfile);
463: fputs(s, p->outfile);
464: fputc('\n', p->outfile);
1.1 kristaps 465: p->outcol = p->seenws = 0;
466: }
467:
468: /*
469: * Introduce vertical space during normal (non-macro) input.
470: */
471: void
472: texivspace(struct texi *p)
473: {
474:
1.30 kristaps 475: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
476: p->seenvs = 1;
1.1 kristaps 477: }
478:
479: /*
480: * Advance by a single byte in the input stream, adjusting our location
481: * in the current input file.
482: */
483: void
1.14 kristaps 484: advance(struct texi *p, size_t *pos)
1.1 kristaps 485: {
1.15 kristaps 486: struct texifile *f;
1.1 kristaps 487:
1.15 kristaps 488: f = &p->files[p->filepos - 1];
489:
490: if (0 == f->insplice) {
491: if ('\n' == BUF(p)[*pos]) {
492: f->line++;
493: f->col = 0;
494: } else
495: f->col++;
1.17 kristaps 496: } else {
1.15 kristaps 497: --f->insplice;
1.17 kristaps 498: if (0 == f->insplice)
499: f->depth = 0;
500: }
1.1 kristaps 501:
502: (*pos)++;
503: }
504:
505: /*
506: * It's common to wait punctuation to float on the right side of macro
507: * lines in mdoc(7), e.g., ".Em hello ) ."
508: * This function does so, and should be called before teximacroclose().
509: * It will detect that it's the last in the nested macros and
510: * appropriately flush-left punctuation alongside the macro.
511: */
512: void
1.14 kristaps 513: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 514: {
515: size_t start, end;
516:
517: if (1 != p->outmacro)
518: return;
519:
1.14 kristaps 520: for (start = end = *pos; end < BUFSZ(p); end++) {
521: switch (BUF(p)[end]) {
1.1 kristaps 522: case (','):
523: case (')'):
524: case ('.'):
525: case ('"'):
526: case (':'):
1.22 kristaps 527: case (';'):
1.1 kristaps 528: case ('!'):
529: case ('?'):
530: continue;
531: default:
532: break;
533: }
534: break;
535: }
536: if (end == *pos)
537: return;
1.14 kristaps 538: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 kristaps 539: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 540: for ( ; start < end; start++) {
541: texiputchar(p, ' ');
1.14 kristaps 542: texiputchar(p, BUF(p)[start]);
543: advance(p, pos);
1.1 kristaps 544: }
545: }
546: }
547:
548: /*
549: * Advance to the next non-whitespace word in the input stream.
550: * If we're in literal mode, then print all of the whitespace as we're
551: * doing so.
552: */
553: static size_t
1.14 kristaps 554: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 555: {
556:
557: if (p->literal) {
1.14 kristaps 558: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
559: texiputchar(p, BUF(p)[*pos]);
560: advance(p, pos);
1.1 kristaps 561: }
562: return(*pos);
563: }
564:
1.14 kristaps 565: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 566: p->seenws = 1;
1.30 kristaps 567: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
568: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
569: p->seenvs = 1;
1.14 kristaps 570: advance(p, pos);
1.1 kristaps 571: }
572: return(*pos);
573: }
574:
575: /*
576: * Advance to the EOLN in the input stream.
1.22 kristaps 577: * This will skip over '@' markers in an effort to ignore escaped
578: * newlines.
1.1 kristaps 579: */
580: size_t
1.14 kristaps 581: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 582: {
583:
1.22 kristaps 584: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
585: if ('@' == BUF(p)[*pos])
586: advance(p, pos);
1.14 kristaps 587: advance(p, pos);
1.22 kristaps 588: }
1.14 kristaps 589: if (*pos < BUFSZ(p) && consumenl)
590: advance(p, pos);
1.1 kristaps 591: return(*pos);
592: }
593:
594: /*
595: * Advance to position "end", which is an absolute position in the
596: * current buffer greater than or equal to the current position.
597: */
598: void
1.14 kristaps 599: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 600: {
601:
602: assert(*pos <= end);
603: while (*pos < end)
1.14 kristaps 604: advance(p, pos);
1.1 kristaps 605: }
606:
1.7 kristaps 607: static void
1.17 kristaps 608: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 609: {
1.11 kristaps 610: size_t valsz, realsz, aasz, asz,
611: ssz, i, j, k, start, end;
612: char *val;
613: char **args;
614: const char *cp;
1.7 kristaps 615:
1.17 kristaps 616: /* Disregard empty macros. */
1.22 kristaps 617: if (0 == (valsz = realsz = strlen(m->value))) {
618: args = argparse(p, pos, &asz, m->argsz);
619: for (i = 0; i < asz; i++)
620: free(args[i]);
621: free(args);
1.17 kristaps 622: return;
1.22 kristaps 623: }
1.17 kristaps 624:
625: /*
626: * This is important: it protect us from macros that invoke more
627: * macros, possibly going on infinitely.
628: * We use "sv" instead of the current position because we might
629: * be invoked at the end of the macro (i.e., insplice == 0).
630: * The "sv" value was initialised at the start of the macro.
631: */
632: if (sv > 0)
1.24 kristaps 633: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 634: texierr(p, "maximium recursive depth");
635:
1.14 kristaps 636: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 637: if (asz != m->argsz)
638: texiwarn(p, "invalid macro argument length");
639: aasz = asz < m->argsz ? asz : m->argsz;
640:
641: if (0 == aasz) {
1.21 kristaps 642: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 643: return;
644: }
645:
646: val = strdup(m->value);
647:
648: for (i = j = 0; i < realsz; i++) {
649: /* Parse blindly til the backslash delimiter. */
650: if ('\\' != m->value[i]) {
651: val[j++] = m->value[i];
652: val[j] = '\0';
653: continue;
654: } else if (i == realsz - 1)
655: texierr(p, "trailing argument name delimiter");
656:
657: /* Double-backslash is escaped. */
658: if ('\\' == m->value[i + 1]) {
659: val[j++] = m->value[i++];
660: val[j] = '\0';
661: continue;
662: }
663:
664: assert('\\' == m->value[i] && i < realsz - 1);
665:
666: /* Parse to terminating delimiter. */
667: /* FIXME: embedded, escaped delimiters? */
668: for (start = end = i + 1; end < realsz; end++)
669: if ('\\' == m->value[end])
670: break;
671: if (end == realsz)
672: texierr(p, "unterminated argument name");
673:
674: for (k = 0; k < aasz; k++) {
675: if ((ssz = strlen(m->args[k])) != (end - start))
676: continue;
677: if (strncmp(&m->value[start], m->args[k], ssz))
678: continue;
679: break;
680: }
681:
682: /*
683: * Argument didn't exist in argument table.
1.14 kristaps 684: * Just ignore it.
1.7 kristaps 685: */
686: if (k == aasz) {
1.14 kristaps 687: i = end;
1.7 kristaps 688: continue;
689: }
690:
691: if (strlen(args[k]) > ssz) {
692: valsz += strlen(args[k]);
693: val = realloc(val, valsz + 1);
694: if (NULL == val)
695: texiabort(p, NULL);
696: }
697:
1.11 kristaps 698: for (cp = args[k]; '\0' != *cp; cp++)
699: val[j++] = *cp;
700:
701: val[j] = '\0';
1.7 kristaps 702: i = end;
703: }
704:
1.21 kristaps 705: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 706:
707: for (i = 0; i < asz; i++)
708: free(args[i]);
709: free(args);
710: free(val);
711: }
712:
1.1 kristaps 713: /*
714: * Output a free-form word in the input stream, progressing to the next
715: * command or white-space.
716: * This also will advance the input stream.
717: */
718: static void
1.14 kristaps 719: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 720: {
1.29 kristaps 721: size_t i, end, len;
722: int c;
1.1 kristaps 723:
1.25 kristaps 724: /*
1.27 kristaps 725: * If a prior word had a terminating double-newline, then begin
726: * this text block with a `Pp'.
727: * We don't do this if we're in a literal context (we'll print
728: * out the newlines themselves) nor in a `TS' table.
729: */
1.30 kristaps 730: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
731: if (p->outcol > 0)
732: fputc('\n', p->outfile);
733: fputs(".Pp\n", p->outfile);
734: p->outcol = 0;
735: }
1.27 kristaps 736:
737: /*
1.25 kristaps 738: * Some line control: if we (non-macro, non-literal) already
739: * have more than 72 characters written to the screen, then
740: * output a newline before getting started.
741: */
1.1 kristaps 742: if (p->seenws && 0 == p->outmacro &&
743: p->outcol > 72 && 0 == p->literal)
744: texiputchar(p, '\n');
1.25 kristaps 745:
746: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 747: if (p->seenws && p->outcol && 0 == p->literal)
748: texiputchar(p, ' ');
749:
750: p->seenws = 0;
1.29 kristaps 751:
752: /*
753: * If we're in a macro line, we might want to print text that
754: * happens to be the same as an mdoc(7) macro.
755: * Obviously, we need to escape these words.
756: */
757: if (p->outmacro) {
758: end = *pos;
759: /* Read ahead to get the word length. */
760: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
761: switch ((c = BUF(p)[end])) {
762: case ('@'):
763: case ('}'):
764: case ('{'):
765: break;
766: default:
767: if ('\0' != extra && extra == c)
768: break;
769: end++;
770: continue;
771: }
772: break;
773: }
774: len = end - *pos;
775: /* See if we have a match. */
776: for (i = 0; NULL != mdocs[i]; i++) {
777: /* All macros are 2 or three letters. */
778: if (len < 2 || len > 3)
779: continue;
780: /* Check the macro word length. */
781: if ('\0' == mdocs[i][2] && 2 != len)
782: continue;
783: else if ('\0' == mdocs[i][3] && 3 != len)
784: continue;
785: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
786: continue;
787: texiputchars(p, "\\&");
788: break;
789: }
790: }
1.1 kristaps 791:
1.14 kristaps 792: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
793: switch (BUF(p)[*pos]) {
1.1 kristaps 794: case ('@'):
795: case ('}'):
796: case ('{'):
797: return;
798: }
1.14 kristaps 799: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 800: return;
1.28 kristaps 801:
802: if (p->literal) {
803: texiputchar(p, BUF(p)[*pos]);
804: advance(p, pos);
805: continue;
806: }
807:
1.30 kristaps 808: if ('"' == BUF(p)[*pos]) {
809: texiputchars(p, "\\(dq");
810: } else if (*pos < BUFSZ(p) - 2 &&
1.28 kristaps 811: '-' == BUF(p)[*pos] &&
812: '-' == BUF(p)[*pos + 1] &&
813: '-' == BUF(p)[*pos + 2]) {
814: texiputchars(p, "\\(em");
815: advance(p, pos);
816: advance(p, pos);
817: } else if (*pos < BUFSZ(p) - 1 &&
818: '-' == BUF(p)[*pos] &&
819: '-' == BUF(p)[*pos + 1]) {
820: texiputchars(p, "\\(en");
821: advance(p, pos);
822: } else if (*pos < BUFSZ(p) - 1 &&
1.14 kristaps 823: '`' == BUF(p)[*pos] &&
824: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 825: texiputchars(p, "\\(lq");
1.14 kristaps 826: advance(p, pos);
827: } else if (*pos < BUFSZ(p) - 1 &&
828: '\'' == BUF(p)[*pos] &&
829: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 830: texiputchars(p, "\\(rq");
1.14 kristaps 831: advance(p, pos);
1.1 kristaps 832: } else
1.14 kristaps 833: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 834:
1.14 kristaps 835: advance(p, pos);
1.1 kristaps 836: }
1.25 kristaps 837:
838: /*
839: * New sentence, new line:if we (non-macro, non-literal) see a
840: * period at the end of the last printed word, then open a
841: * newline.
842: */
1.30 kristaps 843: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
844: switch (BUF(p)[*pos - 1]) {
845: case ('.'):
846: case ('!'):
847: case ('?'):
848: texiputchar(p, '\n');
849: break;
850: default:
851: break;
852: }
853:
854: p->seenvs = 0;
1.1 kristaps 855: }
856:
857: /*
858: * Look up the command at position "pos" in the buffer, returning it (or
859: * TEXICMD__MAX if none found) and setting "end" to be the absolute
860: * index after the command name.
861: */
862: enum texicmd
1.19 kristaps 863: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 864: {
1.4 kristaps 865: size_t i, len, toksz;
1.1 kristaps 866:
1.14 kristaps 867: assert('@' == BUF(p)[pos]);
1.1 kristaps 868:
1.7 kristaps 869: if (NULL != macro)
870: *macro = NULL;
871:
1.14 kristaps 872: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 873: return(TEXICMD__MAX);
1.14 kristaps 874: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 875: return(TEXICMD__MAX);
876:
877: /* Alphabetic commands are special. */
1.23 kristaps 878: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 879: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 880: return(TEXICMD__MAX);
881: for (i = 0; i < TEXICMD__MAX; i++) {
882: if (1 != texitoks[i].len)
883: continue;
1.14 kristaps 884: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 885: return(i);
886: }
1.14 kristaps 887: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 888: return(TEXICMD__MAX);
889: }
890:
1.4 kristaps 891: /* Scan to the end of the possible command name. */
1.14 kristaps 892: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
893: if ((*end > pos && ('@' == BUF(p)[*end] ||
894: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 895: break;
896:
1.4 kristaps 897: /* Look for the command. */
1.1 kristaps 898: len = *end - pos;
899: for (i = 0; i < TEXICMD__MAX; i++) {
900: if (len != texitoks[i].len)
901: continue;
1.14 kristaps 902: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 903: return(i);
904: }
905:
1.4 kristaps 906: /* Look for it in our indices. */
907: for (i = 0; i < p->indexsz; i++) {
1.30 kristaps 908: toksz = strlen(p->indexs[i].name);
1.4 kristaps 909: if (len != 5 + toksz)
910: continue;
1.30 kristaps 911: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 912: continue;
1.14 kristaps 913: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 914: return(TEXICMD_USER_INDEX);
915: }
916:
917: for (i = 0; i < p->macrosz; i++) {
918: if (len != strlen(p->macros[i].key))
919: continue;
1.14 kristaps 920: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 921: continue;
922: if (NULL != macro)
923: *macro = &p->macros[i];
924: return(TEXICMD__MAX);
1.4 kristaps 925: }
926:
1.14 kristaps 927: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 928: return(TEXICMD__MAX);
929: }
930:
931: /*
932: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
933: * Num should be set to the argument we're currently parsing, although
934: * it suffixes for it to be zero or non-zero.
935: * This will return 1 if there are more arguments, 0 otherwise.
936: * This will stop (returning 0) in the event of EOF or if we're not at a
937: * bracket for the zeroth parse.
938: */
939: int
1.14 kristaps 940: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 941: {
1.17 kristaps 942: size_t end, sv;
1.7 kristaps 943: enum texicmd cmd;
944: struct teximacro *macro;
1.1 kristaps 945:
1.14 kristaps 946: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
947: advance(p, pos);
948: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 949: return(0);
950: if (0 == num)
1.14 kristaps 951: advance(p, pos);
1.1 kristaps 952:
1.14 kristaps 953: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
954: switch (BUF(p)[*pos]) {
1.1 kristaps 955: case (','):
1.14 kristaps 956: advance(p, pos);
1.1 kristaps 957: return(1);
958: case ('}'):
1.14 kristaps 959: advance(p, pos);
1.1 kristaps 960: return(0);
961: case ('{'):
962: if (0 == p->ign)
963: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 964: advance(p, pos);
1.1 kristaps 965: continue;
966: case ('@'):
967: break;
968: default:
1.14 kristaps 969: parseword(p, pos, ',');
1.1 kristaps 970: continue;
971: }
972:
1.17 kristaps 973: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 974: cmd = texicmd(p, *pos, &end, ¯o);
975: advanceto(p, pos, end);
1.7 kristaps 976: if (NULL != macro)
1.17 kristaps 977: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 978: if (TEXICMD__MAX == cmd)
979: continue;
980: if (NULL != texitoks[cmd].fp)
1.14 kristaps 981: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 982: }
983: return(0);
984: }
985:
986: /*
987: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
988: * This will stop in the event of EOF or if we're not at a bracket.
989: */
990: void
1.18 kristaps 991: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 992: {
1.18 kristaps 993: size_t end, sv, stack;
1.7 kristaps 994: enum texicmd cmd;
995: struct teximacro *macro;
1.1 kristaps 996:
1.14 kristaps 997: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
998: advance(p, pos);
1.1 kristaps 999:
1.14 kristaps 1000: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 1001: return;
1.14 kristaps 1002: advance(p, pos);
1.1 kristaps 1003:
1.18 kristaps 1004: stack = 0;
1.14 kristaps 1005: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1006: switch (BUF(p)[*pos]) {
1.1 kristaps 1007: case ('}'):
1.18 kristaps 1008: if (stack > 0) {
1009: stack--;
1010: advance(p, pos);
1011: texiputchar(p, '}');
1012: continue;
1013: }
1.14 kristaps 1014: advance(p, pos);
1.1 kristaps 1015: return;
1016: case ('{'):
1.18 kristaps 1017: if (dostack) {
1018: stack++;
1019: advance(p, pos);
1020: texiputchar(p, '{');
1021: continue;
1022: }
1.1 kristaps 1023: if (0 == p->ign)
1024: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1025: advance(p, pos);
1.1 kristaps 1026: continue;
1027: case ('@'):
1028: break;
1029: default:
1.14 kristaps 1030: parseword(p, pos, '\0');
1.1 kristaps 1031: continue;
1032: }
1033:
1.17 kristaps 1034: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1035: cmd = texicmd(p, *pos, &end, ¯o);
1036: advanceto(p, pos, end);
1.7 kristaps 1037: if (NULL != macro)
1.17 kristaps 1038: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1039: if (TEXICMD__MAX == cmd)
1040: continue;
1041: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1042: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1043: }
1044: }
1045:
1046: /*
1047: * This should be invoked when we're on a macro line and want to process
1048: * to the end of the current input line, doing all of our macros along
1049: * the way.
1050: */
1051: void
1.14 kristaps 1052: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1053: {
1.17 kristaps 1054: size_t end, sv;
1.7 kristaps 1055: enum texicmd cmd;
1056: struct teximacro *macro;
1.1 kristaps 1057:
1.14 kristaps 1058: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1059: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1060: p->seenws = 1;
1061: if (p->literal)
1.14 kristaps 1062: texiputchar(p, BUF(p)[*pos]);
1063: advance(p, pos);
1.1 kristaps 1064: }
1.14 kristaps 1065: switch (BUF(p)[*pos]) {
1.1 kristaps 1066: case ('}'):
1067: if (0 == p->ign)
1068: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1069: advance(p, pos);
1.1 kristaps 1070: continue;
1071: case ('{'):
1072: if (0 == p->ign)
1073: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1074: advance(p, pos);
1.1 kristaps 1075: continue;
1.30 kristaps 1076: case ('\n'):
1077: continue;
1.1 kristaps 1078: case ('@'):
1079: break;
1080: default:
1.14 kristaps 1081: parseword(p, pos, '\0');
1.1 kristaps 1082: continue;
1083: }
1084:
1.17 kristaps 1085: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1086: cmd = texicmd(p, *pos, &end, ¯o);
1087: advanceto(p, pos, end);
1.7 kristaps 1088: if (NULL != macro)
1.17 kristaps 1089: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1090: if (TEXICMD__MAX == cmd)
1091: continue;
1092: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1093: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1094: }
1.14 kristaps 1095:
1096: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1097: advance(p, pos);
1.19 kristaps 1098: }
1099:
1.30 kristaps 1100: enum texicmd
1101: peeklinecmd(const struct texi *p, size_t pos)
1102: {
1103: size_t end;
1104:
1105: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
1106: pos++;
1107: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1108: return(TEXICMD__MAX);
1109: return(texicmd(p, pos, &end, NULL));
1110: }
1111:
1.19 kristaps 1112: /*
1113: * Peek to see if there's a command after subsequent whitespace.
1114: * If so, return the macro identifier.
1115: * This DOES NOT work with user-defined macros.
1116: */
1117: enum texicmd
1118: peekcmd(const struct texi *p, size_t pos)
1119: {
1120: size_t end;
1121:
1122: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1123: pos++;
1124: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1125: return(TEXICMD__MAX);
1126: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1127: }
1128:
1129: /*
1130: * Parse a single word or command.
1131: * This will return immediately at the EOF.
1132: */
1.14 kristaps 1133: static void
1134: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1135: {
1.17 kristaps 1136: size_t end, sv;
1.7 kristaps 1137: enum texicmd cmd;
1138: struct teximacro *macro;
1.1 kristaps 1139:
1.14 kristaps 1140: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1141: return;
1142:
1.14 kristaps 1143: switch (BUF(p)[*pos]) {
1.1 kristaps 1144: case ('}'):
1145: if (0 == p->ign)
1146: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1147: advance(p, pos);
1.1 kristaps 1148: return;
1149: case ('{'):
1150: if (0 == p->ign)
1151: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1152: advance(p, pos);
1.1 kristaps 1153: return;
1154: case ('@'):
1155: break;
1156: default:
1.14 kristaps 1157: parseword(p, pos, '\0');
1.1 kristaps 1158: return;
1159: }
1160:
1.17 kristaps 1161: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1162: cmd = texicmd(p, *pos, &end, ¯o);
1163: advanceto(p, pos, end);
1.7 kristaps 1164: if (NULL != macro)
1.17 kristaps 1165: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1166: if (TEXICMD__MAX == cmd)
1167: return;
1168: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1169: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1170: }
1171:
1172: /*
1173: * This is used in the @deffn type of command.
1174: * These have an arbitrary number of line arguments; however, these
1175: * arguments may or may not be surrounded by brackets.
1176: * In this function, we parse each one as either a bracketed or
1177: * non-bracketed argument, returning 0 when we've reached the end of
1178: * line or 1 otherwise.
1179: */
1180: int
1.14 kristaps 1181: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1182: {
1183:
1.14 kristaps 1184: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1185: p->seenws = 1;
1.14 kristaps 1186: advance(p, pos);
1.1 kristaps 1187: }
1188:
1.14 kristaps 1189: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1190: parsebracket(p, pos, 0);
1.14 kristaps 1191: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1192: parsesingle(p, pos);
1.1 kristaps 1193: else
1194: return(0);
1195:
1196: return(1);
1197: }
1198:
1199: /*
1200: * Parse til the end of the buffer.
1201: */
1.14 kristaps 1202: static void
1203: parseeof(struct texi *p)
1.1 kristaps 1204: {
1205: size_t pos;
1206:
1.14 kristaps 1207: for (pos = 0; pos < BUFSZ(p); )
1208: parsesingle(p, &pos);
1.1 kristaps 1209: }
1210:
1.8 kristaps 1211: void
1.21 kristaps 1212: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1213: {
1.14 kristaps 1214: char *cp;
1215: struct texifile *f;
1.8 kristaps 1216:
1.14 kristaps 1217: assert(p->filepos > 0);
1218: f = &p->files[p->filepos - 1];
1.8 kristaps 1219:
1.14 kristaps 1220: if (f->mapsz + sz > f->mapmaxsz) {
1221: f->mapmaxsz = f->mapsz + sz + 1024;
1222: cp = realloc(f->map, f->mapmaxsz);
1223: if (NULL == cp)
1224: texiabort(p, NULL);
1225: f->map = cp;
1226: }
1.8 kristaps 1227:
1.15 kristaps 1228: f->insplice += sz;
1.21 kristaps 1229: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1230: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1231: f->mapsz += sz;
1.8 kristaps 1232: }
1233:
1234: /*
1.1 kristaps 1235: * Parse a block sequence until we have the "@end endtoken" command
1236: * invocation.
1237: * This will return immediately at EOF.
1238: */
1239: void
1.14 kristaps 1240: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1241: {
1.17 kristaps 1242: size_t end, sv;
1.7 kristaps 1243: enum texicmd cmd;
1244: size_t endtoksz;
1245: struct teximacro *macro;
1.1 kristaps 1246:
1247: endtoksz = strlen(endtoken);
1248: assert(endtoksz > 0);
1249:
1.14 kristaps 1250: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1251: switch (BUF(p)[*pos]) {
1.1 kristaps 1252: case ('}'):
1253: if (0 == p->ign)
1254: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1255: advance(p, pos);
1.1 kristaps 1256: continue;
1257: case ('{'):
1258: if (0 == p->ign)
1259: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1260: advance(p, pos);
1.1 kristaps 1261: continue;
1262: case ('@'):
1263: break;
1264: default:
1.14 kristaps 1265: parseword(p, pos, '\0');
1.1 kristaps 1266: continue;
1267: }
1268:
1.17 kristaps 1269: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1270: cmd = texicmd(p, *pos, &end, ¯o);
1271: advanceto(p, pos, end);
1.1 kristaps 1272: if (TEXICMD_END == cmd) {
1.14 kristaps 1273: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1274: advance(p, pos);
1.1 kristaps 1275: /*
1276: * FIXME: check the full word, not just its
1277: * initial substring!
1278: */
1.14 kristaps 1279: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1280: (&BUF(p)[*pos], endtoken, endtoksz)) {
1281: advanceeoln(p, pos, 0);
1.1 kristaps 1282: break;
1283: }
1284: if (0 == p->ign)
1285: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1286: advanceeoln(p, pos, 0);
1.1 kristaps 1287: continue;
1.7 kristaps 1288: }
1289: if (NULL != macro)
1.17 kristaps 1290: texiexecmacro(p, macro, sv, pos);
1.7 kristaps 1291: if (TEXICMD__MAX == cmd)
1292: continue;
1293: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1294: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1295: }
1.30 kristaps 1296:
1297: if (*pos == BUFSZ(p))
1298: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1299: }
1300:
1301: /*
1.12 kristaps 1302: * Like parsefile() but used for reading from stdandard input.
1303: * This can only be called for the first file!
1304: */
1305: void
1306: parsestdin(struct texi *p)
1307: {
1308: struct texifile *f;
1309: ssize_t ssz;
1310:
1311: assert(0 == p->filepos);
1312: f = &p->files[p->filepos];
1313: memset(f, 0, sizeof(struct texifile));
1314:
1315: f->type = TEXISRC_STDIN;
1316: f->name = "<stdin>";
1317:
1.14 kristaps 1318: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1319: if (f->mapsz == f->mapmaxsz) {
1320: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1321: texierr(p, "stdin buffer too long");
1.14 kristaps 1322: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1323: 2 * f->mapmaxsz : 65536;
1324: f->map = realloc(f->map, f->mapmaxsz);
1.12 kristaps 1325: if (NULL == f->map)
1326: texiabort(p, NULL);
1327: }
1.14 kristaps 1328: ssz = read(STDIN_FILENO, f->map +
1329: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1330: if (0 == ssz)
1331: break;
1332: else if (-1 == ssz)
1333: texiabort(p, NULL);
1334: }
1335:
1336: p->filepos++;
1.14 kristaps 1337: parseeof(p);
1.12 kristaps 1338: texifilepop(p);
1339: }
1340:
1341: /*
1.1 kristaps 1342: * Memory-map the file "fname" and begin parsing it unless "parse" is
1343: * zero, in which case we just dump the file to stdout (making sure it
1344: * doesn't trip up mdoc(7) along the way).
1345: * This can be called in a nested context.
1346: */
1347: void
1348: parsefile(struct texi *p, const char *fname, int parse)
1349: {
1350: struct texifile *f;
1351: int fd;
1352: struct stat st;
1353: size_t i;
1.14 kristaps 1354: char *map;
1.1 kristaps 1355:
1.5 kristaps 1356: if (64 == p->filepos)
1.6 kristaps 1357: texierr(p, "too many open files");
1.1 kristaps 1358: f = &p->files[p->filepos];
1359: memset(f, 0, sizeof(struct texifile));
1360:
1.12 kristaps 1361: f->type = TEXISRC_FILE;
1.1 kristaps 1362: f->name = fname;
1363: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1364: texiabort(p, fname);
1365: } else if (-1 == fstat(fd, &st)) {
1366: close(fd);
1367: texiabort(p, fname);
1368: }
1369:
1.14 kristaps 1370: f->mapsz = f->mapmaxsz = st.st_size;
1371: map = mmap(NULL, f->mapsz,
1.1 kristaps 1372: PROT_READ, MAP_SHARED, fd, 0);
1373: close(fd);
1374:
1.14 kristaps 1375: if (MAP_FAILED == map)
1.1 kristaps 1376: texiabort(p, fname);
1377:
1378: if ( ! parse) {
1.13 kristaps 1379: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1380: texiputchar(p, map[i]);
1.13 kristaps 1381: if (p->outcol)
1382: texiputchar(p, '\n');
1.14 kristaps 1383: munmap(map, f->mapsz);
1384: return;
1385: }
1386:
1387: p->filepos++;
1388: f->map = malloc(f->mapsz);
1389: memcpy(f->map, map, f->mapsz);
1390: munmap(map, f->mapsz);
1391: parseeof(p);
1.1 kristaps 1392: texifilepop(p);
1393: }
1394:
1.2 kristaps 1395: /*
1396: * Look up the value to a stored pair's value starting in "buf" from
1397: * start to end.
1398: * Return the pointer to the value memory, which can be NULL if the
1399: * pointer key does not exist.
1400: * The pointer can point to NULL if the value has been unset.
1401: */
1402: static char **
1.14 kristaps 1403: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1404: {
1405: size_t i, sz, len;
1406:
1407: assert(end >= start);
1408: /* Ignore zero-length. */
1409: if (0 == (len = (end - start)))
1410: return(NULL);
1411: for (i = 0; i < p->valsz; i++) {
1412: sz = strlen(p->vals[i].key);
1413: if (sz != len)
1414: continue;
1.14 kristaps 1415: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1416: return(&p->vals[i].value);
1417: }
1418: return(NULL);
1419: }
1420:
1421: /*
1422: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1423: * pointer to its value via valuequery().
1424: */
1425: static char **
1.14 kristaps 1426: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1427: {
1428: size_t start, end;
1429: char **ret;
1430:
1.14 kristaps 1431: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1432: advance(p, pos);
1433: if (*pos == BUFSZ(p))
1.2 kristaps 1434: return(NULL);
1.14 kristaps 1435: for (start = end = *pos; end < BUFSZ(p); end++)
1436: if ('\n' == BUF(p)[end])
1.2 kristaps 1437: break;
1.14 kristaps 1438: advanceto(p, pos, end);
1439: if (*pos < BUFSZ(p)) {
1440: assert('\n' == BUF(p)[*pos]);
1441: advance(p, pos);
1.2 kristaps 1442: }
1.14 kristaps 1443: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1444: return(NULL);
1445: return(ret);
1446: }
1447:
1448: void
1.14 kristaps 1449: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1450: {
1451: char **ret;
1452:
1.14 kristaps 1453: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1454: return;
1455: free(*ret);
1456: *ret = NULL;
1457: }
1458:
1459: const char *
1.14 kristaps 1460: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1461: {
1462: char **ret;
1463:
1.14 kristaps 1464: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1465: return(NULL);
1466: return(*ret);
1467: }
1468:
1469: /*
1470: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1471: * the pointer to its value.
1472: * If the returned pointer is NULL, either there was no string within
1473: * the brackets (or no brackets), or the value was not found, or the
1474: * value had previously been unset.
1475: */
1476: const char *
1.14 kristaps 1477: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1478: {
1479: size_t start, end;
1480: char **ret;
1481:
1.14 kristaps 1482: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1483: advance(p, pos);
1484: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1485: return(NULL);
1.14 kristaps 1486: advance(p, pos);
1487: for (start = end = *pos; end < BUFSZ(p); end++)
1488: if ('}' == BUF(p)[end])
1.2 kristaps 1489: break;
1.14 kristaps 1490: advanceto(p, pos, end);
1491: if (*pos < BUFSZ(p)) {
1492: assert('}' == BUF(p)[*pos]);
1493: advance(p, pos);
1.2 kristaps 1494: }
1.14 kristaps 1495: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1496: return(NULL);
1497: return(*ret);
1498: }
1499:
1500: void
1501: valueadd(struct texi *p, char *key, char *val)
1502: {
1503: size_t i;
1504:
1505: assert(NULL != key);
1506: assert(NULL != val);
1507:
1508: for (i = 0; i < p->valsz; i++)
1509: if (0 == strcmp(p->vals[i].key, key))
1510: break;
1511:
1512: if (i < p->valsz) {
1513: free(key);
1514: free(p->vals[i].value);
1515: p->vals[i].value = val;
1516: } else {
1.4 kristaps 1517: /* FIXME: reallocarray() */
1.2 kristaps 1518: p->vals = realloc(p->vals,
1519: (p->valsz + 1) *
1520: sizeof(struct texivalue));
1.4 kristaps 1521: if (NULL == p->vals)
1522: texiabort(p, NULL);
1.2 kristaps 1523: p->vals[p->valsz].key = key;
1524: p->vals[p->valsz].value = val;
1525: p->valsz++;
1526: }
1.7 kristaps 1527: }
1528:
1529: /*
1530: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1531: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1532: * an array of arguments of size "argsz".
1533: * These need to be freed individually and as a whole.
1534: * NOTE: this will puke on @, or @} macros, which can trick it into
1535: * stopping argument parsing earlier.
1536: * Ergo, textual: this doesn't interpret the arguments in any way.
1537: */
1538: char **
1.14 kristaps 1539: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1540: {
1541: char **args;
1542: size_t start, end, stack;
1543:
1.14 kristaps 1544: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1545: advance(p, pos);
1.7 kristaps 1546:
1547: args = NULL;
1548: *argsz = 0;
1549:
1.17 kristaps 1550: if (*pos == BUFSZ(p))
1551: return(args);
1552:
1.14 kristaps 1553: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1554: /*
1555: * Special case: if we encounter an unbracketed argument
1556: * and we're being invoked with non-zero arguments
1557: * (versus being set, i.e., hint>0), then parse until
1558: * the end of line.
1559: */
1560: *argsz = 1;
1561: args = calloc(1, sizeof(char *));
1562: if (NULL == args)
1563: texiabort(p, NULL);
1564: start = *pos;
1.14 kristaps 1565: while (*pos < BUFSZ(p)) {
1566: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1567: break;
1.14 kristaps 1568: advance(p, pos);
1.10 kristaps 1569: }
1570: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1571: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1572: args[0][*pos - start] = '\0';
1.14 kristaps 1573: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1574: advance(p, pos);
1.10 kristaps 1575: return(args);
1.14 kristaps 1576: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1577: return(args);
1.17 kristaps 1578:
1579: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1580:
1581: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1582: advance(p, pos);
1583: while (*pos < BUFSZ(p)) {
1584: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1585: advance(p, pos);
1.7 kristaps 1586: start = *pos;
1587: stack = 0;
1.14 kristaps 1588: while (*pos < BUFSZ(p)) {
1.7 kristaps 1589: /*
1590: * According to the manual, commas within
1591: * embedded commands are escaped.
1592: * We keep track of embedded-ness in the "stack"
1593: * state anyway, so this is free.
1594: */
1.14 kristaps 1595: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1596: break;
1.14 kristaps 1597: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1598: break;
1.14 kristaps 1599: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1600: stack--;
1.14 kristaps 1601: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1602: stack++;
1.14 kristaps 1603: advance(p, pos);
1.7 kristaps 1604: }
1605: if (stack)
1606: texiwarn(p, "unterminated macro "
1607: "in macro arguments");
1.14 kristaps 1608: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1609: break;
1610: /* Test for zero-length '{ }'. */
1.14 kristaps 1611: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1612: break;
1613: /* FIXME: use reallocarray. */
1614: args = realloc
1615: (args, sizeof(char *) *
1616: (*argsz + 1));
1617: if (NULL == args)
1618: texiabort(p, NULL);
1619: args[*argsz] = malloc(end - start + 1);
1620: if (NULL == args[*argsz])
1621: texiabort(p, NULL);
1622: memcpy(args[*argsz],
1.14 kristaps 1623: &BUF(p)[start], end - start);
1.7 kristaps 1624: args[*argsz][end - start] = '\0';
1625: (*argsz)++;
1.14 kristaps 1626: if ('}' == BUF(p)[*pos])
1.7 kristaps 1627: break;
1.14 kristaps 1628: advance(p, pos);
1.7 kristaps 1629: }
1630:
1.14 kristaps 1631: if (*pos == BUFSZ(p))
1.7 kristaps 1632: texierr(p, "unterminated arguments");
1.14 kristaps 1633: assert('}' == BUF(p)[*pos]);
1634: advance(p, pos);
1.7 kristaps 1635: return(args);
1.2 kristaps 1636: }
1.20 kristaps 1637:
1638: /*
1639: * If we're printing chapters, then do some naviation here and then
1640: * close our outfile.
1641: * I want to call this the SEE ALSO section, but that's not really what
1642: * it is: we'll refer to the "initial" (top) node and the next and
1643: * previous chapters.
1644: */
1645: void
1646: teximdocclose(struct texi *p, int last)
1647: {
1648: char buf[PATH_MAX];
1649:
1650: if (NULL == p->chapters || 0 == p->chapnum)
1651: return;
1652:
1653: teximacro(p, "Sh INFO NAVIGATION");
1654:
1655: /* Print a reference to the "top" node. */
1656: if (p->chapnum > 1) {
1.22 kristaps 1657: texiputchars(p, "Top node,");
1.31 ! kristaps 1658: snprintf(buf, sizeof(buf), "%s-1 7", p->chapters);
! 1659: p->seenvs = 0;
1.20 kristaps 1660: teximacroopen(p, "Xr ");
1661: texiputchars(p, buf);
1.22 kristaps 1662: texiputchars(p, " ;");
1.20 kristaps 1663: teximacroclose(p);
1664: }
1665:
1666: /* Print a reference to the previous node. */
1667: if (p->chapnum > 2) {
1.22 kristaps 1668: texiputchars(p, "previous node,");
1.20 kristaps 1669: snprintf(buf, sizeof(buf),
1.31 ! kristaps 1670: "%s-%zu 7", p->chapters, p->chapnum - 1);
! 1671: p->seenvs = 0;
1.20 kristaps 1672: teximacroopen(p, "Xr ");
1673: texiputchars(p, buf);
1674: if ( ! last)
1.22 kristaps 1675: texiputchars(p, " ;");
1.20 kristaps 1676: teximacroclose(p);
1677: }
1678:
1679: /* Print a reference to the next node. */
1680: if ( ! last) {
1.22 kristaps 1681: if (1 == p->chapnum)
1682: texiputchars(p, "Next node,");
1683: else
1684: texiputchars(p, "next node,");
1.20 kristaps 1685: snprintf(buf, sizeof(buf),
1.31 ! kristaps 1686: "%s-%zu 7", p->chapters, p->chapnum + 1);
! 1687: p->seenvs = 0;
1.20 kristaps 1688: teximacroopen(p, "Xr ");
1689: texiputchars(p, buf);
1690: teximacroclose(p);
1691: }
1692:
1693: fclose(p->outfile);
1694: }
1695:
1696: /*
1697: * Open a mdoc(7) context.
1698: * If we're printing chapters, then open the outfile here, too.
1699: * Otherwise just print the mdoc(7) prologue.
1700: */
1701: void
1.21 kristaps 1702: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1703: {
1704: const char *cp;
1705: time_t t;
1706: char date[32];
1707: char fname[PATH_MAX];
1708:
1709: if (NULL != p->chapters) {
1.31 ! kristaps 1710: p->chapnum++;
! 1711: snprintf(fname, sizeof(fname), "%s-%zu",
! 1712: p->chapters, p->chapnum);
! 1713: p->nodes = realloc(p->nodes,
! 1714: (p->nodesz + 1) * sizeof(char *));
! 1715: if (NULL == p->nodes)
! 1716: texiabort(p, NULL);
! 1717: p->nodes[p->nodesz] = strdup(fname);
! 1718: if (NULL == p->nodes[p->nodesz])
! 1719: texiabort(p, NULL);
! 1720: p->nodesz++;
! 1721: snprintf(fname, sizeof(fname), "%s-%zu.7",
! 1722: p->chapters, p->chapnum);
1.20 kristaps 1723: p->outfile = fopen(fname, "w");
1724: if (NULL == p->outfile)
1725: texiabort(p, fname);
1726: }
1727:
1728: /*
1729: * Here we print our standard mdoc(7) prologue.
1730: * We use the title set with @settitle for the `Nd' description
1731: * and the source document filename (the first one as invoked on
1732: * the command line) for the title.
1733: * The date is set to the current date.
1734: */
1735: t = time(NULL);
1736: strftime(date, sizeof(date), "%F", localtime(&t));
1737:
1.30 kristaps 1738: p->seenvs = -1;
1.20 kristaps 1739: teximacroopen(p, "Dd");
1740: texiputchars(p, date);
1741: teximacroclose(p);
1742: teximacroopen(p, "Dt");
1743: for (cp = p->title; '\0' != *cp; cp++)
1744: texiputchar(p, toupper((unsigned int)*cp));
1745: texiputchars(p, " 7");
1746: teximacroclose(p);
1747: teximacro(p, "Os");
1748: teximacro(p, "Sh NAME");
1749: teximacroopen(p, "Nm");
1750: for (cp = p->title; '\0' != *cp; cp++)
1751: texiputchar(p, *cp);
1752: teximacroclose(p);
1753: teximacroopen(p, "Nd");
1.21 kristaps 1754: /*
1755: * The subtitle `Nd' can consist of arbitrary macros, so paste
1756: * it and parse to the end of the line.
1757: */
1758: if (NULL != p->subtitle) {
1759: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1760: parseeoln(p, pos);
1761: } else
1.20 kristaps 1762: texiputchars(p, "Unknown description");
1763: teximacroclose(p);
1764: }
1765:
CVSweb