Annotation of texi2mdoc/util.c, Revision 1.30
1.30 ! kristaps 1: /* $Id: util.c,v 1.29 2015/03/07 11:53:21 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <time.h>
29: #include <unistd.h>
30:
31: #include "extern.h"
32:
33: /*
1.29 kristaps 34: * Table of macros.
35: * These ABSOLUTELY MUST BE 2 or three characters long.
36: */
37: static const char *const mdocs[] = {
38: "Ap", "Dd", "Dt", "Os",
39: "Sh", "Ss", "Pp", "D1",
40: "Dl", "Bd", "Ed", "Bl",
41: "El", "It", "Ad", "An",
42: "Ar", "Cd", "Cm", "Dv",
43: "Er", "Ev", "Ex", "Fa",
44: "Fd", "Fl", "Fn", "Ft",
45: "Ic", "In", "Li", "Nd",
46: "Nm", "Op", "Ot", "Pa",
47: "Rv", "St", "Va", "Vt",
48: "Xr", "%A", "%B", "%D",
49: "%I", "%J", "%N", "%O",
50: "%P", "%R", "%T", "%V",
51: "Ac", "Ao", "Aq", "At",
52: "Bc", "Bf", "Bo", "Bq",
53: "Bsx", "Bx", "Db", "Dc",
54: "Do", "Dq", "Ec", "Ef",
55: "Em", "Eo", "Fx", "Ms",
56: "No", "Ns", "Nx", "Ox",
57: "Pc", "Pf", "Po", "Pq",
58: "Qc", "Ql", "Qo", "Qq",
59: "Re", "Rs", "Sc", "So",
60: "Sq", "Sm", "Sx", "Sy",
61: "Tn", "Ux", "Xc", "Xo",
62: "Fo", "Fc", "Oo", "Oc",
63: "Bk", "Ek", "Bt", "Hf",
64: "Fr", "Ud", "Lb", "Lp",
65: "Lk", "Mt", "Brq", "Bro",
66: "Brc", "%C", "Es", "En",
67: "Dx", "%Q", "br", "sp",
68: "%U", "Ta", "ll", NULL,
69: };
70:
71: /*
1.1 kristaps 72: * Unmap the top-most file in the stack of files currently opened (that
73: * is, nested calls to parsefile()).
74: */
75: void
76: texifilepop(struct texi *p)
77: {
78: struct texifile *f;
79:
80: assert(p->filepos > 0);
81: f = &p->files[--p->filepos];
1.14 kristaps 82: free(f->map);
1.1 kristaps 83: }
84:
1.7 kristaps 85: static void
86: teximacrofree(struct teximacro *p)
87: {
88: size_t i;
89:
90: for (i = 0; i < p->argsz; i++)
91: free(p->args[i]);
92:
93: free(p->args);
94: free(p->key);
95: free(p->value);
96: }
97:
98: static void
99: texivaluefree(struct texivalue *p)
100: {
101:
102: free(p->key);
103: free(p->value);
104: }
105:
1.30 ! kristaps 106: static void
! 107: texidex_free(struct texidex *p)
! 108: {
! 109: size_t i;
! 110:
! 111: for (i = 0; i < p->indexsz; i++)
! 112: free(p->index[i]);
! 113:
! 114: free(p->index);
! 115: free(p->name);
! 116: p->index = NULL;
! 117: p->indexsz = 0;
! 118: }
! 119:
! 120: /*
! 121: * Add the text beginning at "index" and of "sz" bytes to the index
! 122: * named "tok" with name size "toksz".
! 123: * This will also output the necessary mdoc(7) to construct the index.
! 124: */
! 125: void
! 126: texindex(struct texi *p, const char *tok,
! 127: size_t toksz, const char *index, size_t sz)
! 128: {
! 129: size_t i;
! 130: #ifdef HAVE_INDEX
! 131: char *cp;
! 132: #endif
! 133:
! 134: if (0 == sz) {
! 135: texiwarn(p, "zero-length index entry");
! 136: return;
! 137: }
! 138:
! 139: /* Look for the index. (Must be found.) */
! 140: for (i = 0; i < p->indexsz; i++) {
! 141: if (strlen(p->indexs[i].name) != toksz)
! 142: continue;
! 143: if (strncmp(p->indexs[i].name, tok, toksz))
! 144: continue;
! 145: break;
! 146: }
! 147:
! 148: assert(i < p->indexsz);
! 149: /* Reallocate index's terms. */
! 150: p->indexs[i].index = realloc
! 151: (p->indexs[i].index,
! 152: (p->indexs[i].indexsz + 1) *
! 153: sizeof(char *));
! 154: if (NULL == p->indexs[i].index)
! 155: texiabort(p, NULL);
! 156:
! 157: /* Add term to term array. */
! 158: p->indexs[i].index[p->indexs[i].indexsz] =
! 159: malloc(sz + 1);
! 160: if (NULL == p->indexs[i].index[p->indexs[i].indexsz])
! 161: texiabort(p, NULL);
! 162: memcpy(p->indexs[i].index[p->indexs[i].indexsz],
! 163: index, sz);
! 164: p->indexs[i].index[p->indexs[i].indexsz][sz] = '\0';
! 165:
! 166: /* Output mdoc(7) for index. */
! 167: #ifdef HAVE_INDEX
! 168: p->seenvs = -1;
! 169: teximacroopen(p, "Ix");
! 170: texiputchars(p, "idx");
! 171: texiputchars(p, p->indexs[i].name);
! 172: cp = p->indexs[i].index[p->indexs[i].indexsz];
! 173: while ('\n' != *cp) {
! 174: assert('\0' != *cp);
! 175: texiputchar(p, *cp++);
! 176: }
! 177: teximacroclose(p);
! 178: #endif
! 179: p->indexs[i].indexsz++;
! 180: }
! 181:
! 182: /*
! 183: * Add an index entry named "tok" of length "sz".
! 184: * This usually consists of two letters, e.g., "cp" or "vr".
! 185: * This does nothing if the index exists or is zero-sized.
! 186: */
! 187: void
! 188: texindex_add(struct texi *p, const char *tok, size_t sz)
! 189: {
! 190: size_t i;
! 191: char *cp;
! 192:
! 193: if (0 == sz)
! 194: return;
! 195:
! 196: /* Make sure we don't have a duplicate. */
! 197: for (i = 0; i < p->indexsz; i++) {
! 198: if (strlen(p->indexs[i].name) != sz)
! 199: continue;
! 200: if (strncmp(p->indexs[i].name, tok, sz))
! 201: continue;
! 202: return;
! 203: }
! 204:
! 205: /* Reallocate indices. */
! 206: p->indexs = realloc(p->indexs,
! 207: sizeof(struct texidex) * (p->indexsz + 1));
! 208: if (NULL == p->indexs)
! 209: texiabort(p, NULL);
! 210: if (NULL == (cp = malloc(sz + 1)))
! 211: texiabort(p, NULL);
! 212: memcpy(cp, tok, sz);
! 213: cp[sz] = '\0';
! 214: p->indexs[p->indexsz].name = cp;
! 215: p->indexs[p->indexsz].index = NULL;
! 216: p->indexs[p->indexsz].indexsz = 0;
! 217: p->indexsz++;
! 218: }
! 219:
1.1 kristaps 220: /*
221: * Unmap all files that we're currently using and free all resources
222: * that we've allocated during the parse.
223: * The utility should exit(...) after this is called.
224: */
225: void
226: texiexit(struct texi *p)
227: {
228: size_t i;
229:
230: /* Make sure we're newline-terminated. */
231: if (p->outcol)
1.20 kristaps 232: fputc('\n', p->outfile);
233: if (NULL != p->chapters)
234: teximdocclose(p, 1);
1.1 kristaps 235:
236: /* Unmap all files. */
237: while (p->filepos > 0)
238: texifilepop(p);
239:
1.7 kristaps 240: for (i = 0; i < p->macrosz; i++)
241: teximacrofree(&p->macros[i]);
1.1 kristaps 242: for (i = 0; i < p->dirsz; i++)
243: free(p->dirs[i]);
1.4 kristaps 244: for (i = 0; i < p->indexsz; i++)
1.30 ! kristaps 245: texidex_free(&p->indexs[i]);
1.7 kristaps 246: for (i = 0; i < p->valsz; i++)
247: texivaluefree(&p->vals[i]);
1.4 kristaps 248:
1.7 kristaps 249: free(p->macros);
1.1 kristaps 250: free(p->vals);
1.4 kristaps 251: free(p->indexs);
1.1 kristaps 252: free(p->dirs);
253: free(p->subtitle);
254: free(p->title);
1.26 kristaps 255: free(p->copying);
1.1 kristaps 256: }
257:
258: /*
259: * Fatal error: unmap all files and exit.
260: * The "errstring" is passed to perror(3).
261: */
262: void
263: texiabort(struct texi *p, const char *errstring)
264: {
265:
266: perror(errstring);
267: texiexit(p);
268: exit(EXIT_FAILURE);
269: }
270:
271: /*
272: * Print a generic warning message (to stderr) tied to our current
273: * location in the parse sequence.
274: */
275: void
276: texiwarn(const struct texi *p, const char *fmt, ...)
277: {
1.15 kristaps 278: va_list ap;
279: const struct texifile *f;
280:
281: f = &p->files[p->filepos - 1];
282:
283: if (f->insplice)
284: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
285: "warning: ", f->name, f->line + 1,
286: f->col + 1, f->insplice);
287: else
288: fprintf(stderr, "%s:%zu:%zu: warning: ",
289: f->name, f->line + 1, f->col + 1);
1.1 kristaps 290:
291: va_start(ap, fmt);
292: vfprintf(stderr, fmt, ap);
293: va_end(ap);
294: fputc('\n', stderr);
295: }
296:
297: /*
298: * Print an error message (to stderr) tied to our current location in
299: * the parse sequence, invoke texiexit(), then die.
300: */
301: void
302: texierr(struct texi *p, const char *fmt, ...)
303: {
1.15 kristaps 304: va_list ap;
305: struct texifile *f;
306:
307: f = &p->files[p->filepos - 1];
308:
309: if (f->insplice)
310: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
311: "error: ", f->name, f->line + 1,
312: f->col + 1, f->insplice);
313: else
314: fprintf(stderr, "%s:%zu:%zu: error: ",
315: f->name, f->line + 1, f->col + 1);
1.1 kristaps 316:
317: va_start(ap, fmt);
318: vfprintf(stderr, fmt, ap);
319: va_end(ap);
320: fputc('\n', stderr);
321: texiexit(p);
322: exit(EXIT_FAILURE);
323: }
324:
325: /*
326: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 327: * Escape starting a line with a control character and slashes.
1.1 kristaps 328: */
329: void
330: texiputchar(struct texi *p, char c)
331: {
332:
333: if (p->ign)
334: return;
335: if ('.' == c && 0 == p->outcol)
1.20 kristaps 336: fputs("\\&", p->outfile);
1.10 kristaps 337: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 338: fputs("\\&", p->outfile);
1.1 kristaps 339:
1.23 kristaps 340: if (p->uppercase)
341: fputc(toupper((unsigned int)c), p->outfile);
342: else
343: fputc(c, p->outfile);
1.13 kristaps 344: if ('\\' == c)
1.20 kristaps 345: fputc('e', p->outfile);
1.1 kristaps 346: if ('\n' == c) {
347: p->outcol = 0;
348: p->seenws = 0;
349: } else
350: p->outcol++;
351: }
352:
353: /*
1.13 kristaps 354: * Put an opaque series of characters.
355: * Characters starting a line with a control character are escaped, but
356: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 357: */
358: void
359: texiputchars(struct texi *p, const char *s)
360: {
361:
1.13 kristaps 362: if (p->ign)
363: return;
364: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 365: fputs("\\&", p->outfile);
1.13 kristaps 366: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 367: fputs("\\&", p->outfile);
1.23 kristaps 368: if (p->uppercase)
369: for ( ; '\0' != *s; s++)
370: p->outcol += fputc(toupper
371: ((unsigned int)*s), p->outfile);
372: else
373: p->outcol += fputs(s, p->outfile);
1.9 kristaps 374: }
375:
376: /*
377: * This puts all characters onto the output stream but makes sure to
378: * escape mdoc(7) slashes.
1.14 kristaps 379: * FIXME: useless.
1.9 kristaps 380: */
381: void
1.14 kristaps 382: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 383: {
384:
1.14 kristaps 385: for ( ; start < end; start++)
386: texiputchar(p, BUF(p)[start]);
1.1 kristaps 387: }
388:
389: /*
390: * Close an mdoc(7) macro opened with teximacroopen().
391: * If there are no more macros on the line, prints a newline.
392: */
393: void
394: teximacroclose(struct texi *p)
395: {
396:
1.30 ! kristaps 397: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 398: return;
399:
400: if (0 == --p->outmacro) {
1.20 kristaps 401: fputc('\n', p->outfile);
1.1 kristaps 402: p->outcol = p->seenws = 0;
403: }
404: }
405:
406: /*
407: * Open a mdoc(7) macro.
408: * This is used for line macros, e.g., Qq [foo bar baz].
409: * It can be invoked for nested macros, e.g., Qq Li foo .
410: * TODO: flush-right punctuation (e.g., parenthesis).
411: */
412: void
413: teximacroopen(struct texi *p, const char *s)
414: {
415: int rc;
416:
1.30 ! kristaps 417: if (p->ign || p->literal|| TEXILIST_TABLE == p->list)
1.1 kristaps 418: return;
419:
420: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 421: fputc('\n', p->outfile);
1.1 kristaps 422: p->outcol = 0;
423: }
424:
1.30 ! kristaps 425: if (p->seenvs > 0 && 0 == p->outmacro)
! 426: fputs(".Pp\n", p->outfile);
! 427:
1.1 kristaps 428: if (0 == p->outmacro)
1.20 kristaps 429: fputc('.', p->outfile);
1.1 kristaps 430: else
1.20 kristaps 431: fputc(' ', p->outfile);
1.1 kristaps 432:
1.20 kristaps 433: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 434: p->outcol += rc;
435:
1.20 kristaps 436: fputc(' ', p->outfile);
1.1 kristaps 437: p->outcol++;
438: p->outmacro++;
1.30 ! kristaps 439: p->seenws = p->seenvs = 0;
1.1 kristaps 440: }
441:
442: /*
443: * Put a stadnalone mdoc(7) command with the trailing newline.
444: */
445: void
446: teximacro(struct texi *p, const char *s)
447: {
448:
449: if (p->ign)
450: return;
451:
452: if (p->outmacro)
453: texierr(p, "\"%s\" in open line scope!?", s);
454: if (p->literal)
455: texierr(p, "\"%s\" in a literal scope!?", s);
456: if (p->outcol)
1.20 kristaps 457: fputc('\n', p->outfile);
1.30 ! kristaps 458: if (p->seenvs > 0)
! 459: fputs(".Pp\n", p->outfile);
1.1 kristaps 460:
1.20 kristaps 461: fputc('.', p->outfile);
462: fputs(s, p->outfile);
463: fputc('\n', p->outfile);
1.1 kristaps 464: p->outcol = p->seenws = 0;
465: }
466:
467: /*
468: * Introduce vertical space during normal (non-macro) input.
469: */
470: void
471: texivspace(struct texi *p)
472: {
473:
1.30 ! kristaps 474: if (TEXILIST_TABLE != p->list && p->seenvs >= 0)
! 475: p->seenvs = 1;
1.1 kristaps 476: }
477:
478: /*
479: * Advance by a single byte in the input stream, adjusting our location
480: * in the current input file.
481: */
482: void
1.14 kristaps 483: advance(struct texi *p, size_t *pos)
1.1 kristaps 484: {
1.15 kristaps 485: struct texifile *f;
1.1 kristaps 486:
1.15 kristaps 487: f = &p->files[p->filepos - 1];
488:
489: if (0 == f->insplice) {
490: if ('\n' == BUF(p)[*pos]) {
491: f->line++;
492: f->col = 0;
493: } else
494: f->col++;
1.17 kristaps 495: } else {
1.15 kristaps 496: --f->insplice;
1.17 kristaps 497: if (0 == f->insplice)
498: f->depth = 0;
499: }
1.1 kristaps 500:
501: (*pos)++;
502: }
503:
504: /*
505: * It's common to wait punctuation to float on the right side of macro
506: * lines in mdoc(7), e.g., ".Em hello ) ."
507: * This function does so, and should be called before teximacroclose().
508: * It will detect that it's the last in the nested macros and
509: * appropriately flush-left punctuation alongside the macro.
510: */
511: void
1.14 kristaps 512: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 513: {
514: size_t start, end;
515:
516: if (1 != p->outmacro)
517: return;
518:
1.14 kristaps 519: for (start = end = *pos; end < BUFSZ(p); end++) {
520: switch (BUF(p)[end]) {
1.1 kristaps 521: case (','):
522: case (')'):
523: case ('.'):
524: case ('"'):
525: case (':'):
1.22 kristaps 526: case (';'):
1.1 kristaps 527: case ('!'):
528: case ('?'):
529: continue;
530: default:
531: break;
532: }
533: break;
534: }
535: if (end == *pos)
536: return;
1.14 kristaps 537: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
1.30 ! kristaps 538: '@' == BUF(p)[end] || '\n' == BUF(p)[end]) {
1.1 kristaps 539: for ( ; start < end; start++) {
540: texiputchar(p, ' ');
1.14 kristaps 541: texiputchar(p, BUF(p)[start]);
542: advance(p, pos);
1.1 kristaps 543: }
544: }
545: }
546:
547: /*
548: * Advance to the next non-whitespace word in the input stream.
549: * If we're in literal mode, then print all of the whitespace as we're
550: * doing so.
551: */
552: static size_t
1.14 kristaps 553: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 554: {
555:
556: if (p->literal) {
1.14 kristaps 557: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
558: texiputchar(p, BUF(p)[*pos]);
559: advance(p, pos);
1.1 kristaps 560: }
561: return(*pos);
562: }
563:
1.14 kristaps 564: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 565: p->seenws = 1;
1.30 ! kristaps 566: if (0 == p->seenvs && '\n' == BUF(p)[*pos])
! 567: if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1])
! 568: p->seenvs = 1;
1.14 kristaps 569: advance(p, pos);
1.1 kristaps 570: }
571: return(*pos);
572: }
573:
574: /*
575: * Advance to the EOLN in the input stream.
1.22 kristaps 576: * This will skip over '@' markers in an effort to ignore escaped
577: * newlines.
1.1 kristaps 578: */
579: size_t
1.14 kristaps 580: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 581: {
582:
1.22 kristaps 583: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
584: if ('@' == BUF(p)[*pos])
585: advance(p, pos);
1.14 kristaps 586: advance(p, pos);
1.22 kristaps 587: }
1.14 kristaps 588: if (*pos < BUFSZ(p) && consumenl)
589: advance(p, pos);
1.1 kristaps 590: return(*pos);
591: }
592:
593: /*
594: * Advance to position "end", which is an absolute position in the
595: * current buffer greater than or equal to the current position.
596: */
597: void
1.14 kristaps 598: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 599: {
600:
601: assert(*pos <= end);
602: while (*pos < end)
1.14 kristaps 603: advance(p, pos);
1.1 kristaps 604: }
605:
1.7 kristaps 606: static void
1.17 kristaps 607: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 608: {
1.11 kristaps 609: size_t valsz, realsz, aasz, asz,
610: ssz, i, j, k, start, end;
611: char *val;
612: char **args;
613: const char *cp;
1.7 kristaps 614:
1.17 kristaps 615: /* Disregard empty macros. */
1.22 kristaps 616: if (0 == (valsz = realsz = strlen(m->value))) {
617: args = argparse(p, pos, &asz, m->argsz);
618: for (i = 0; i < asz; i++)
619: free(args[i]);
620: free(args);
1.17 kristaps 621: return;
1.22 kristaps 622: }
1.17 kristaps 623:
624: /*
625: * This is important: it protect us from macros that invoke more
626: * macros, possibly going on infinitely.
627: * We use "sv" instead of the current position because we might
628: * be invoked at the end of the macro (i.e., insplice == 0).
629: * The "sv" value was initialised at the start of the macro.
630: */
631: if (sv > 0)
1.24 kristaps 632: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 633: texierr(p, "maximium recursive depth");
634:
1.14 kristaps 635: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 636: if (asz != m->argsz)
637: texiwarn(p, "invalid macro argument length");
638: aasz = asz < m->argsz ? asz : m->argsz;
639:
640: if (0 == aasz) {
1.21 kristaps 641: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 642: return;
643: }
644:
645: val = strdup(m->value);
646:
647: for (i = j = 0; i < realsz; i++) {
648: /* Parse blindly til the backslash delimiter. */
649: if ('\\' != m->value[i]) {
650: val[j++] = m->value[i];
651: val[j] = '\0';
652: continue;
653: } else if (i == realsz - 1)
654: texierr(p, "trailing argument name delimiter");
655:
656: /* Double-backslash is escaped. */
657: if ('\\' == m->value[i + 1]) {
658: val[j++] = m->value[i++];
659: val[j] = '\0';
660: continue;
661: }
662:
663: assert('\\' == m->value[i] && i < realsz - 1);
664:
665: /* Parse to terminating delimiter. */
666: /* FIXME: embedded, escaped delimiters? */
667: for (start = end = i + 1; end < realsz; end++)
668: if ('\\' == m->value[end])
669: break;
670: if (end == realsz)
671: texierr(p, "unterminated argument name");
672:
673: for (k = 0; k < aasz; k++) {
674: if ((ssz = strlen(m->args[k])) != (end - start))
675: continue;
676: if (strncmp(&m->value[start], m->args[k], ssz))
677: continue;
678: break;
679: }
680:
681: /*
682: * Argument didn't exist in argument table.
1.14 kristaps 683: * Just ignore it.
1.7 kristaps 684: */
685: if (k == aasz) {
1.14 kristaps 686: i = end;
1.7 kristaps 687: continue;
688: }
689:
690: if (strlen(args[k]) > ssz) {
691: valsz += strlen(args[k]);
692: val = realloc(val, valsz + 1);
693: if (NULL == val)
694: texiabort(p, NULL);
695: }
696:
1.11 kristaps 697: for (cp = args[k]; '\0' != *cp; cp++)
698: val[j++] = *cp;
699:
700: val[j] = '\0';
1.7 kristaps 701: i = end;
702: }
703:
1.21 kristaps 704: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 705:
706: for (i = 0; i < asz; i++)
707: free(args[i]);
708: free(args);
709: free(val);
710: }
711:
1.1 kristaps 712: /*
713: * Output a free-form word in the input stream, progressing to the next
714: * command or white-space.
715: * This also will advance the input stream.
716: */
717: static void
1.14 kristaps 718: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 719: {
1.29 kristaps 720: size_t i, end, len;
721: int c;
1.1 kristaps 722:
1.25 kristaps 723: /*
1.27 kristaps 724: * If a prior word had a terminating double-newline, then begin
725: * this text block with a `Pp'.
726: * We don't do this if we're in a literal context (we'll print
727: * out the newlines themselves) nor in a `TS' table.
728: */
1.30 ! kristaps 729: if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) {
! 730: if (p->outcol > 0)
! 731: fputc('\n', p->outfile);
! 732: fputs(".Pp\n", p->outfile);
! 733: p->outcol = 0;
! 734: }
1.27 kristaps 735:
736: /*
1.25 kristaps 737: * Some line control: if we (non-macro, non-literal) already
738: * have more than 72 characters written to the screen, then
739: * output a newline before getting started.
740: */
1.1 kristaps 741: if (p->seenws && 0 == p->outmacro &&
742: p->outcol > 72 && 0 == p->literal)
743: texiputchar(p, '\n');
1.25 kristaps 744:
745: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 746: if (p->seenws && p->outcol && 0 == p->literal)
747: texiputchar(p, ' ');
748:
749: p->seenws = 0;
1.29 kristaps 750:
751: /*
752: * If we're in a macro line, we might want to print text that
753: * happens to be the same as an mdoc(7) macro.
754: * Obviously, we need to escape these words.
755: */
756: if (p->outmacro) {
757: end = *pos;
758: /* Read ahead to get the word length. */
759: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
760: switch ((c = BUF(p)[end])) {
761: case ('@'):
762: case ('}'):
763: case ('{'):
764: break;
765: default:
766: if ('\0' != extra && extra == c)
767: break;
768: end++;
769: continue;
770: }
771: break;
772: }
773: len = end - *pos;
774: /* See if we have a match. */
775: for (i = 0; NULL != mdocs[i]; i++) {
776: /* All macros are 2 or three letters. */
777: if (len < 2 || len > 3)
778: continue;
779: /* Check the macro word length. */
780: if ('\0' == mdocs[i][2] && 2 != len)
781: continue;
782: else if ('\0' == mdocs[i][3] && 3 != len)
783: continue;
784: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
785: continue;
786: texiputchars(p, "\\&");
787: break;
788: }
789: }
1.1 kristaps 790:
1.14 kristaps 791: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
792: switch (BUF(p)[*pos]) {
1.1 kristaps 793: case ('@'):
794: case ('}'):
795: case ('{'):
796: return;
797: }
1.14 kristaps 798: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 799: return;
1.28 kristaps 800:
801: if (p->literal) {
802: texiputchar(p, BUF(p)[*pos]);
803: advance(p, pos);
804: continue;
805: }
806:
1.30 ! kristaps 807: if ('"' == BUF(p)[*pos]) {
! 808: texiputchars(p, "\\(dq");
! 809: } else if (*pos < BUFSZ(p) - 2 &&
1.28 kristaps 810: '-' == BUF(p)[*pos] &&
811: '-' == BUF(p)[*pos + 1] &&
812: '-' == BUF(p)[*pos + 2]) {
813: texiputchars(p, "\\(em");
814: advance(p, pos);
815: advance(p, pos);
816: } else if (*pos < BUFSZ(p) - 1 &&
817: '-' == BUF(p)[*pos] &&
818: '-' == BUF(p)[*pos + 1]) {
819: texiputchars(p, "\\(en");
820: advance(p, pos);
821: } else if (*pos < BUFSZ(p) - 1 &&
1.14 kristaps 822: '`' == BUF(p)[*pos] &&
823: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 824: texiputchars(p, "\\(lq");
1.14 kristaps 825: advance(p, pos);
826: } else if (*pos < BUFSZ(p) - 1 &&
827: '\'' == BUF(p)[*pos] &&
828: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 829: texiputchars(p, "\\(rq");
1.14 kristaps 830: advance(p, pos);
1.1 kristaps 831: } else
1.14 kristaps 832: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 833:
1.14 kristaps 834: advance(p, pos);
1.1 kristaps 835: }
1.25 kristaps 836:
837: /*
838: * New sentence, new line:if we (non-macro, non-literal) see a
839: * period at the end of the last printed word, then open a
840: * newline.
841: */
1.30 ! kristaps 842: if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p))
! 843: switch (BUF(p)[*pos - 1]) {
! 844: case ('.'):
! 845: case ('!'):
! 846: case ('?'):
! 847: texiputchar(p, '\n');
! 848: break;
! 849: default:
! 850: break;
! 851: }
! 852:
! 853: p->seenvs = 0;
1.1 kristaps 854: }
855:
856: /*
857: * Look up the command at position "pos" in the buffer, returning it (or
858: * TEXICMD__MAX if none found) and setting "end" to be the absolute
859: * index after the command name.
860: */
861: enum texicmd
1.19 kristaps 862: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 863: {
1.4 kristaps 864: size_t i, len, toksz;
1.1 kristaps 865:
1.14 kristaps 866: assert('@' == BUF(p)[pos]);
1.1 kristaps 867:
1.7 kristaps 868: if (NULL != macro)
869: *macro = NULL;
870:
1.14 kristaps 871: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 872: return(TEXICMD__MAX);
1.14 kristaps 873: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 874: return(TEXICMD__MAX);
875:
876: /* Alphabetic commands are special. */
1.23 kristaps 877: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 878: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 879: return(TEXICMD__MAX);
880: for (i = 0; i < TEXICMD__MAX; i++) {
881: if (1 != texitoks[i].len)
882: continue;
1.14 kristaps 883: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 884: return(i);
885: }
1.14 kristaps 886: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 887: return(TEXICMD__MAX);
888: }
889:
1.4 kristaps 890: /* Scan to the end of the possible command name. */
1.14 kristaps 891: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
892: if ((*end > pos && ('@' == BUF(p)[*end] ||
893: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 894: break;
895:
1.4 kristaps 896: /* Look for the command. */
1.1 kristaps 897: len = *end - pos;
898: for (i = 0; i < TEXICMD__MAX; i++) {
899: if (len != texitoks[i].len)
900: continue;
1.14 kristaps 901: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 902: return(i);
903: }
904:
1.4 kristaps 905: /* Look for it in our indices. */
906: for (i = 0; i < p->indexsz; i++) {
1.30 ! kristaps 907: toksz = strlen(p->indexs[i].name);
1.4 kristaps 908: if (len != 5 + toksz)
909: continue;
1.30 ! kristaps 910: if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz))
1.4 kristaps 911: continue;
1.14 kristaps 912: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 913: return(TEXICMD_USER_INDEX);
914: }
915:
916: for (i = 0; i < p->macrosz; i++) {
917: if (len != strlen(p->macros[i].key))
918: continue;
1.14 kristaps 919: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 920: continue;
921: if (NULL != macro)
922: *macro = &p->macros[i];
923: return(TEXICMD__MAX);
1.4 kristaps 924: }
925:
1.14 kristaps 926: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 927: return(TEXICMD__MAX);
928: }
929:
930: /*
931: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
932: * Num should be set to the argument we're currently parsing, although
933: * it suffixes for it to be zero or non-zero.
934: * This will return 1 if there are more arguments, 0 otherwise.
935: * This will stop (returning 0) in the event of EOF or if we're not at a
936: * bracket for the zeroth parse.
937: */
938: int
1.14 kristaps 939: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 940: {
1.17 kristaps 941: size_t end, sv;
1.7 kristaps 942: enum texicmd cmd;
943: struct teximacro *macro;
1.1 kristaps 944:
1.14 kristaps 945: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
946: advance(p, pos);
947: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 948: return(0);
949: if (0 == num)
1.14 kristaps 950: advance(p, pos);
1.1 kristaps 951:
1.14 kristaps 952: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
953: switch (BUF(p)[*pos]) {
1.1 kristaps 954: case (','):
1.14 kristaps 955: advance(p, pos);
1.1 kristaps 956: return(1);
957: case ('}'):
1.14 kristaps 958: advance(p, pos);
1.1 kristaps 959: return(0);
960: case ('{'):
961: if (0 == p->ign)
962: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 963: advance(p, pos);
1.1 kristaps 964: continue;
965: case ('@'):
966: break;
967: default:
1.14 kristaps 968: parseword(p, pos, ',');
1.1 kristaps 969: continue;
970: }
971:
1.17 kristaps 972: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 973: cmd = texicmd(p, *pos, &end, ¯o);
974: advanceto(p, pos, end);
1.7 kristaps 975: if (NULL != macro)
1.17 kristaps 976: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 977: if (TEXICMD__MAX == cmd)
978: continue;
979: if (NULL != texitoks[cmd].fp)
1.14 kristaps 980: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 981: }
982: return(0);
983: }
984:
985: /*
986: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
987: * This will stop in the event of EOF or if we're not at a bracket.
988: */
989: void
1.18 kristaps 990: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 991: {
1.18 kristaps 992: size_t end, sv, stack;
1.7 kristaps 993: enum texicmd cmd;
994: struct teximacro *macro;
1.1 kristaps 995:
1.14 kristaps 996: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
997: advance(p, pos);
1.1 kristaps 998:
1.14 kristaps 999: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 1000: return;
1.14 kristaps 1001: advance(p, pos);
1.1 kristaps 1002:
1.18 kristaps 1003: stack = 0;
1.14 kristaps 1004: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1005: switch (BUF(p)[*pos]) {
1.1 kristaps 1006: case ('}'):
1.18 kristaps 1007: if (stack > 0) {
1008: stack--;
1009: advance(p, pos);
1010: texiputchar(p, '}');
1011: continue;
1012: }
1.14 kristaps 1013: advance(p, pos);
1.1 kristaps 1014: return;
1015: case ('{'):
1.18 kristaps 1016: if (dostack) {
1017: stack++;
1018: advance(p, pos);
1019: texiputchar(p, '{');
1020: continue;
1021: }
1.1 kristaps 1022: if (0 == p->ign)
1023: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1024: advance(p, pos);
1.1 kristaps 1025: continue;
1026: case ('@'):
1027: break;
1028: default:
1.14 kristaps 1029: parseword(p, pos, '\0');
1.1 kristaps 1030: continue;
1031: }
1032:
1.17 kristaps 1033: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1034: cmd = texicmd(p, *pos, &end, ¯o);
1035: advanceto(p, pos, end);
1.7 kristaps 1036: if (NULL != macro)
1.17 kristaps 1037: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1038: if (TEXICMD__MAX == cmd)
1039: continue;
1040: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1041: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1042: }
1043: }
1044:
1045: /*
1046: * This should be invoked when we're on a macro line and want to process
1047: * to the end of the current input line, doing all of our macros along
1048: * the way.
1049: */
1050: void
1.14 kristaps 1051: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 1052: {
1.17 kristaps 1053: size_t end, sv;
1.7 kristaps 1054: enum texicmd cmd;
1055: struct teximacro *macro;
1.1 kristaps 1056:
1.14 kristaps 1057: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
1058: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1059: p->seenws = 1;
1060: if (p->literal)
1.14 kristaps 1061: texiputchar(p, BUF(p)[*pos]);
1062: advance(p, pos);
1.1 kristaps 1063: }
1.14 kristaps 1064: switch (BUF(p)[*pos]) {
1.1 kristaps 1065: case ('}'):
1066: if (0 == p->ign)
1067: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1068: advance(p, pos);
1.1 kristaps 1069: continue;
1070: case ('{'):
1071: if (0 == p->ign)
1072: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1073: advance(p, pos);
1.1 kristaps 1074: continue;
1.30 ! kristaps 1075: case ('\n'):
! 1076: continue;
1.1 kristaps 1077: case ('@'):
1078: break;
1079: default:
1.14 kristaps 1080: parseword(p, pos, '\0');
1.1 kristaps 1081: continue;
1082: }
1083:
1.17 kristaps 1084: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1085: cmd = texicmd(p, *pos, &end, ¯o);
1086: advanceto(p, pos, end);
1.7 kristaps 1087: if (NULL != macro)
1.17 kristaps 1088: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1089: if (TEXICMD__MAX == cmd)
1090: continue;
1091: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1092: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1093: }
1.14 kristaps 1094:
1095: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1096: advance(p, pos);
1.19 kristaps 1097: }
1098:
1.30 ! kristaps 1099: enum texicmd
! 1100: peeklinecmd(const struct texi *p, size_t pos)
! 1101: {
! 1102: size_t end;
! 1103:
! 1104: while (pos < BUFSZ(p) && isws(BUF(p)[pos]))
! 1105: pos++;
! 1106: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
! 1107: return(TEXICMD__MAX);
! 1108: return(texicmd(p, pos, &end, NULL));
! 1109: }
! 1110:
1.19 kristaps 1111: /*
1112: * Peek to see if there's a command after subsequent whitespace.
1113: * If so, return the macro identifier.
1114: * This DOES NOT work with user-defined macros.
1115: */
1116: enum texicmd
1117: peekcmd(const struct texi *p, size_t pos)
1118: {
1119: size_t end;
1120:
1121: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
1122: pos++;
1123: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
1124: return(TEXICMD__MAX);
1125: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 1126: }
1127:
1128: /*
1129: * Parse a single word or command.
1130: * This will return immediately at the EOF.
1131: */
1.14 kristaps 1132: static void
1133: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 1134: {
1.17 kristaps 1135: size_t end, sv;
1.7 kristaps 1136: enum texicmd cmd;
1137: struct teximacro *macro;
1.1 kristaps 1138:
1.14 kristaps 1139: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1140: return;
1141:
1.14 kristaps 1142: switch (BUF(p)[*pos]) {
1.1 kristaps 1143: case ('}'):
1144: if (0 == p->ign)
1145: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1146: advance(p, pos);
1.1 kristaps 1147: return;
1148: case ('{'):
1149: if (0 == p->ign)
1150: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1151: advance(p, pos);
1.1 kristaps 1152: return;
1153: case ('@'):
1154: break;
1155: default:
1.14 kristaps 1156: parseword(p, pos, '\0');
1.1 kristaps 1157: return;
1158: }
1159:
1.17 kristaps 1160: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1161: cmd = texicmd(p, *pos, &end, ¯o);
1162: advanceto(p, pos, end);
1.7 kristaps 1163: if (NULL != macro)
1.17 kristaps 1164: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1165: if (TEXICMD__MAX == cmd)
1166: return;
1167: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1168: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1169: }
1170:
1171: /*
1172: * This is used in the @deffn type of command.
1173: * These have an arbitrary number of line arguments; however, these
1174: * arguments may or may not be surrounded by brackets.
1175: * In this function, we parse each one as either a bracketed or
1176: * non-bracketed argument, returning 0 when we've reached the end of
1177: * line or 1 otherwise.
1178: */
1179: int
1.14 kristaps 1180: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1181: {
1182:
1.14 kristaps 1183: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1184: p->seenws = 1;
1.14 kristaps 1185: advance(p, pos);
1.1 kristaps 1186: }
1187:
1.14 kristaps 1188: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1189: parsebracket(p, pos, 0);
1.14 kristaps 1190: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1191: parsesingle(p, pos);
1.1 kristaps 1192: else
1193: return(0);
1194:
1195: return(1);
1196: }
1197:
1198: /*
1199: * Parse til the end of the buffer.
1200: */
1.14 kristaps 1201: static void
1202: parseeof(struct texi *p)
1.1 kristaps 1203: {
1204: size_t pos;
1205:
1.14 kristaps 1206: for (pos = 0; pos < BUFSZ(p); )
1207: parsesingle(p, &pos);
1.1 kristaps 1208: }
1209:
1.8 kristaps 1210: void
1.21 kristaps 1211: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1212: {
1.14 kristaps 1213: char *cp;
1214: struct texifile *f;
1.8 kristaps 1215:
1.14 kristaps 1216: assert(p->filepos > 0);
1217: f = &p->files[p->filepos - 1];
1.8 kristaps 1218:
1.14 kristaps 1219: if (f->mapsz + sz > f->mapmaxsz) {
1220: f->mapmaxsz = f->mapsz + sz + 1024;
1221: cp = realloc(f->map, f->mapmaxsz);
1222: if (NULL == cp)
1223: texiabort(p, NULL);
1224: f->map = cp;
1225: }
1.8 kristaps 1226:
1.15 kristaps 1227: f->insplice += sz;
1.21 kristaps 1228: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1229: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1230: f->mapsz += sz;
1.8 kristaps 1231: }
1232:
1233: /*
1.1 kristaps 1234: * Parse a block sequence until we have the "@end endtoken" command
1235: * invocation.
1236: * This will return immediately at EOF.
1237: */
1238: void
1.14 kristaps 1239: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1240: {
1.17 kristaps 1241: size_t end, sv;
1.7 kristaps 1242: enum texicmd cmd;
1243: size_t endtoksz;
1244: struct teximacro *macro;
1.1 kristaps 1245:
1246: endtoksz = strlen(endtoken);
1247: assert(endtoksz > 0);
1248:
1.14 kristaps 1249: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1250: switch (BUF(p)[*pos]) {
1.1 kristaps 1251: case ('}'):
1252: if (0 == p->ign)
1253: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1254: advance(p, pos);
1.1 kristaps 1255: continue;
1256: case ('{'):
1257: if (0 == p->ign)
1258: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1259: advance(p, pos);
1.1 kristaps 1260: continue;
1261: case ('@'):
1262: break;
1263: default:
1.14 kristaps 1264: parseword(p, pos, '\0');
1.1 kristaps 1265: continue;
1266: }
1267:
1.17 kristaps 1268: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1269: cmd = texicmd(p, *pos, &end, ¯o);
1270: advanceto(p, pos, end);
1.1 kristaps 1271: if (TEXICMD_END == cmd) {
1.14 kristaps 1272: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1273: advance(p, pos);
1.1 kristaps 1274: /*
1275: * FIXME: check the full word, not just its
1276: * initial substring!
1277: */
1.14 kristaps 1278: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1279: (&BUF(p)[*pos], endtoken, endtoksz)) {
1280: advanceeoln(p, pos, 0);
1.1 kristaps 1281: break;
1282: }
1283: if (0 == p->ign)
1284: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1285: advanceeoln(p, pos, 0);
1.1 kristaps 1286: continue;
1.7 kristaps 1287: }
1288: if (NULL != macro)
1.17 kristaps 1289: texiexecmacro(p, macro, sv, pos);
1.7 kristaps 1290: if (TEXICMD__MAX == cmd)
1291: continue;
1292: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1293: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1294: }
1.30 ! kristaps 1295:
! 1296: if (*pos == BUFSZ(p))
! 1297: texiwarn(p, "EOF expecting \"%s\" end\n", endtoken);
1.1 kristaps 1298: }
1299:
1300: /*
1.12 kristaps 1301: * Like parsefile() but used for reading from stdandard input.
1302: * This can only be called for the first file!
1303: */
1304: void
1305: parsestdin(struct texi *p)
1306: {
1307: struct texifile *f;
1308: ssize_t ssz;
1309:
1310: assert(0 == p->filepos);
1311: f = &p->files[p->filepos];
1312: memset(f, 0, sizeof(struct texifile));
1313:
1314: f->type = TEXISRC_STDIN;
1315: f->name = "<stdin>";
1316:
1.14 kristaps 1317: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1318: if (f->mapsz == f->mapmaxsz) {
1319: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1320: texierr(p, "stdin buffer too long");
1.14 kristaps 1321: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1322: 2 * f->mapmaxsz : 65536;
1323: f->map = realloc(f->map, f->mapmaxsz);
1.12 kristaps 1324: if (NULL == f->map)
1325: texiabort(p, NULL);
1326: }
1.14 kristaps 1327: ssz = read(STDIN_FILENO, f->map +
1328: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1329: if (0 == ssz)
1330: break;
1331: else if (-1 == ssz)
1332: texiabort(p, NULL);
1333: }
1334:
1335: p->filepos++;
1.14 kristaps 1336: parseeof(p);
1.12 kristaps 1337: texifilepop(p);
1338: }
1339:
1340: /*
1.1 kristaps 1341: * Memory-map the file "fname" and begin parsing it unless "parse" is
1342: * zero, in which case we just dump the file to stdout (making sure it
1343: * doesn't trip up mdoc(7) along the way).
1344: * This can be called in a nested context.
1345: */
1346: void
1347: parsefile(struct texi *p, const char *fname, int parse)
1348: {
1349: struct texifile *f;
1350: int fd;
1351: struct stat st;
1352: size_t i;
1.14 kristaps 1353: char *map;
1.1 kristaps 1354:
1.5 kristaps 1355: if (64 == p->filepos)
1.6 kristaps 1356: texierr(p, "too many open files");
1.1 kristaps 1357: f = &p->files[p->filepos];
1358: memset(f, 0, sizeof(struct texifile));
1359:
1.12 kristaps 1360: f->type = TEXISRC_FILE;
1.1 kristaps 1361: f->name = fname;
1362: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1363: texiabort(p, fname);
1364: } else if (-1 == fstat(fd, &st)) {
1365: close(fd);
1366: texiabort(p, fname);
1367: }
1368:
1.14 kristaps 1369: f->mapsz = f->mapmaxsz = st.st_size;
1370: map = mmap(NULL, f->mapsz,
1.1 kristaps 1371: PROT_READ, MAP_SHARED, fd, 0);
1372: close(fd);
1373:
1.14 kristaps 1374: if (MAP_FAILED == map)
1.1 kristaps 1375: texiabort(p, fname);
1376:
1377: if ( ! parse) {
1.13 kristaps 1378: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1379: texiputchar(p, map[i]);
1.13 kristaps 1380: if (p->outcol)
1381: texiputchar(p, '\n');
1.14 kristaps 1382: munmap(map, f->mapsz);
1383: return;
1384: }
1385:
1386: p->filepos++;
1387: f->map = malloc(f->mapsz);
1388: memcpy(f->map, map, f->mapsz);
1389: munmap(map, f->mapsz);
1390: parseeof(p);
1.1 kristaps 1391: texifilepop(p);
1392: }
1393:
1.2 kristaps 1394: /*
1395: * Look up the value to a stored pair's value starting in "buf" from
1396: * start to end.
1397: * Return the pointer to the value memory, which can be NULL if the
1398: * pointer key does not exist.
1399: * The pointer can point to NULL if the value has been unset.
1400: */
1401: static char **
1.14 kristaps 1402: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1403: {
1404: size_t i, sz, len;
1405:
1406: assert(end >= start);
1407: /* Ignore zero-length. */
1408: if (0 == (len = (end - start)))
1409: return(NULL);
1410: for (i = 0; i < p->valsz; i++) {
1411: sz = strlen(p->vals[i].key);
1412: if (sz != len)
1413: continue;
1.14 kristaps 1414: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1415: return(&p->vals[i].value);
1416: }
1417: return(NULL);
1418: }
1419:
1420: /*
1421: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1422: * pointer to its value via valuequery().
1423: */
1424: static char **
1.14 kristaps 1425: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1426: {
1427: size_t start, end;
1428: char **ret;
1429:
1.14 kristaps 1430: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1431: advance(p, pos);
1432: if (*pos == BUFSZ(p))
1.2 kristaps 1433: return(NULL);
1.14 kristaps 1434: for (start = end = *pos; end < BUFSZ(p); end++)
1435: if ('\n' == BUF(p)[end])
1.2 kristaps 1436: break;
1.14 kristaps 1437: advanceto(p, pos, end);
1438: if (*pos < BUFSZ(p)) {
1439: assert('\n' == BUF(p)[*pos]);
1440: advance(p, pos);
1.2 kristaps 1441: }
1.14 kristaps 1442: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1443: return(NULL);
1444: return(ret);
1445: }
1446:
1447: void
1.14 kristaps 1448: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1449: {
1450: char **ret;
1451:
1.14 kristaps 1452: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1453: return;
1454: free(*ret);
1455: *ret = NULL;
1456: }
1457:
1458: const char *
1.14 kristaps 1459: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1460: {
1461: char **ret;
1462:
1.14 kristaps 1463: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1464: return(NULL);
1465: return(*ret);
1466: }
1467:
1468: /*
1469: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1470: * the pointer to its value.
1471: * If the returned pointer is NULL, either there was no string within
1472: * the brackets (or no brackets), or the value was not found, or the
1473: * value had previously been unset.
1474: */
1475: const char *
1.14 kristaps 1476: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1477: {
1478: size_t start, end;
1479: char **ret;
1480:
1.14 kristaps 1481: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1482: advance(p, pos);
1483: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1484: return(NULL);
1.14 kristaps 1485: advance(p, pos);
1486: for (start = end = *pos; end < BUFSZ(p); end++)
1487: if ('}' == BUF(p)[end])
1.2 kristaps 1488: break;
1.14 kristaps 1489: advanceto(p, pos, end);
1490: if (*pos < BUFSZ(p)) {
1491: assert('}' == BUF(p)[*pos]);
1492: advance(p, pos);
1.2 kristaps 1493: }
1.14 kristaps 1494: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1495: return(NULL);
1496: return(*ret);
1497: }
1498:
1499: void
1500: valueadd(struct texi *p, char *key, char *val)
1501: {
1502: size_t i;
1503:
1504: assert(NULL != key);
1505: assert(NULL != val);
1506:
1507: for (i = 0; i < p->valsz; i++)
1508: if (0 == strcmp(p->vals[i].key, key))
1509: break;
1510:
1511: if (i < p->valsz) {
1512: free(key);
1513: free(p->vals[i].value);
1514: p->vals[i].value = val;
1515: } else {
1.4 kristaps 1516: /* FIXME: reallocarray() */
1.2 kristaps 1517: p->vals = realloc(p->vals,
1518: (p->valsz + 1) *
1519: sizeof(struct texivalue));
1.4 kristaps 1520: if (NULL == p->vals)
1521: texiabort(p, NULL);
1.2 kristaps 1522: p->vals[p->valsz].key = key;
1523: p->vals[p->valsz].value = val;
1524: p->valsz++;
1525: }
1.7 kristaps 1526: }
1527:
1528: /*
1529: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1530: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1531: * an array of arguments of size "argsz".
1532: * These need to be freed individually and as a whole.
1533: * NOTE: this will puke on @, or @} macros, which can trick it into
1534: * stopping argument parsing earlier.
1535: * Ergo, textual: this doesn't interpret the arguments in any way.
1536: */
1537: char **
1.14 kristaps 1538: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1539: {
1540: char **args;
1541: size_t start, end, stack;
1542:
1.14 kristaps 1543: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1544: advance(p, pos);
1.7 kristaps 1545:
1546: args = NULL;
1547: *argsz = 0;
1548:
1.17 kristaps 1549: if (*pos == BUFSZ(p))
1550: return(args);
1551:
1.14 kristaps 1552: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1553: /*
1554: * Special case: if we encounter an unbracketed argument
1555: * and we're being invoked with non-zero arguments
1556: * (versus being set, i.e., hint>0), then parse until
1557: * the end of line.
1558: */
1559: *argsz = 1;
1560: args = calloc(1, sizeof(char *));
1561: if (NULL == args)
1562: texiabort(p, NULL);
1563: start = *pos;
1.14 kristaps 1564: while (*pos < BUFSZ(p)) {
1565: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1566: break;
1.14 kristaps 1567: advance(p, pos);
1.10 kristaps 1568: }
1569: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1570: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1571: args[0][*pos - start] = '\0';
1.14 kristaps 1572: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1573: advance(p, pos);
1.10 kristaps 1574: return(args);
1.14 kristaps 1575: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1576: return(args);
1.17 kristaps 1577:
1578: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1579:
1580: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1581: advance(p, pos);
1582: while (*pos < BUFSZ(p)) {
1583: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1584: advance(p, pos);
1.7 kristaps 1585: start = *pos;
1586: stack = 0;
1.14 kristaps 1587: while (*pos < BUFSZ(p)) {
1.7 kristaps 1588: /*
1589: * According to the manual, commas within
1590: * embedded commands are escaped.
1591: * We keep track of embedded-ness in the "stack"
1592: * state anyway, so this is free.
1593: */
1.14 kristaps 1594: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1595: break;
1.14 kristaps 1596: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1597: break;
1.14 kristaps 1598: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1599: stack--;
1.14 kristaps 1600: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1601: stack++;
1.14 kristaps 1602: advance(p, pos);
1.7 kristaps 1603: }
1604: if (stack)
1605: texiwarn(p, "unterminated macro "
1606: "in macro arguments");
1.14 kristaps 1607: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1608: break;
1609: /* Test for zero-length '{ }'. */
1.14 kristaps 1610: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1611: break;
1612: /* FIXME: use reallocarray. */
1613: args = realloc
1614: (args, sizeof(char *) *
1615: (*argsz + 1));
1616: if (NULL == args)
1617: texiabort(p, NULL);
1618: args[*argsz] = malloc(end - start + 1);
1619: if (NULL == args[*argsz])
1620: texiabort(p, NULL);
1621: memcpy(args[*argsz],
1.14 kristaps 1622: &BUF(p)[start], end - start);
1.7 kristaps 1623: args[*argsz][end - start] = '\0';
1624: (*argsz)++;
1.14 kristaps 1625: if ('}' == BUF(p)[*pos])
1.7 kristaps 1626: break;
1.14 kristaps 1627: advance(p, pos);
1.7 kristaps 1628: }
1629:
1.14 kristaps 1630: if (*pos == BUFSZ(p))
1.7 kristaps 1631: texierr(p, "unterminated arguments");
1.14 kristaps 1632: assert('}' == BUF(p)[*pos]);
1633: advance(p, pos);
1.7 kristaps 1634: return(args);
1.2 kristaps 1635: }
1.20 kristaps 1636:
1637: /*
1638: * If we're printing chapters, then do some naviation here and then
1639: * close our outfile.
1640: * I want to call this the SEE ALSO section, but that's not really what
1641: * it is: we'll refer to the "initial" (top) node and the next and
1642: * previous chapters.
1643: */
1644: void
1645: teximdocclose(struct texi *p, int last)
1646: {
1647: char buf[PATH_MAX];
1648:
1649: if (NULL == p->chapters || 0 == p->chapnum)
1650: return;
1651:
1652: teximacro(p, "Sh INFO NAVIGATION");
1653:
1654: /* Print a reference to the "top" node. */
1655: if (p->chapnum > 1) {
1.22 kristaps 1656: texiputchars(p, "Top node,");
1.20 kristaps 1657: snprintf(buf, sizeof(buf), "node1 7");
1658: teximacroopen(p, "Xr ");
1659: texiputchars(p, buf);
1.22 kristaps 1660: texiputchars(p, " ;");
1.20 kristaps 1661: teximacroclose(p);
1662: }
1663:
1664: /* Print a reference to the previous node. */
1665: if (p->chapnum > 2) {
1.22 kristaps 1666: texiputchars(p, "previous node,");
1.20 kristaps 1667: snprintf(buf, sizeof(buf),
1668: "node%zu 7", p->chapnum - 1);
1669: teximacroopen(p, "Xr ");
1670: texiputchars(p, buf);
1671: if ( ! last)
1.22 kristaps 1672: texiputchars(p, " ;");
1.20 kristaps 1673: teximacroclose(p);
1674: }
1675:
1676: /* Print a reference to the next node. */
1677: if ( ! last) {
1.22 kristaps 1678: if (1 == p->chapnum)
1679: texiputchars(p, "Next node,");
1680: else
1681: texiputchars(p, "next node,");
1.20 kristaps 1682: snprintf(buf, sizeof(buf),
1683: "node%zu 7", p->chapnum + 1);
1684: teximacroopen(p, "Xr ");
1685: texiputchars(p, buf);
1686: teximacroclose(p);
1687: }
1688:
1689: fclose(p->outfile);
1690: }
1691:
1692: /*
1693: * Open a mdoc(7) context.
1694: * If we're printing chapters, then open the outfile here, too.
1695: * Otherwise just print the mdoc(7) prologue.
1696: */
1697: void
1.21 kristaps 1698: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1699: {
1700: const char *cp;
1701: time_t t;
1702: char date[32];
1703: char fname[PATH_MAX];
1704:
1705: if (NULL != p->chapters) {
1706: snprintf(fname, sizeof(fname), "%s/node%zu.7",
1707: p->chapters, ++p->chapnum);
1708: p->outfile = fopen(fname, "w");
1709: if (NULL == p->outfile)
1710: texiabort(p, fname);
1711: }
1712:
1713: /*
1714: * Here we print our standard mdoc(7) prologue.
1715: * We use the title set with @settitle for the `Nd' description
1716: * and the source document filename (the first one as invoked on
1717: * the command line) for the title.
1718: * The date is set to the current date.
1719: */
1720: t = time(NULL);
1721: strftime(date, sizeof(date), "%F", localtime(&t));
1722:
1.30 ! kristaps 1723: p->seenvs = -1;
1.20 kristaps 1724: teximacroopen(p, "Dd");
1725: texiputchars(p, date);
1726: teximacroclose(p);
1727: teximacroopen(p, "Dt");
1728: for (cp = p->title; '\0' != *cp; cp++)
1729: texiputchar(p, toupper((unsigned int)*cp));
1730: texiputchars(p, " 7");
1731: teximacroclose(p);
1732: teximacro(p, "Os");
1733: teximacro(p, "Sh NAME");
1734: teximacroopen(p, "Nm");
1735: for (cp = p->title; '\0' != *cp; cp++)
1736: texiputchar(p, *cp);
1737: teximacroclose(p);
1738: teximacroopen(p, "Nd");
1.21 kristaps 1739: /*
1740: * The subtitle `Nd' can consist of arbitrary macros, so paste
1741: * it and parse to the end of the line.
1742: */
1743: if (NULL != p->subtitle) {
1744: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1745: parseeoln(p, pos);
1746: } else
1.20 kristaps 1747: texiputchars(p, "Unknown description");
1748: teximacroclose(p);
1749: }
1750:
CVSweb