Annotation of texi2mdoc/util.c, Revision 1.29
1.29 ! kristaps 1: /* $Id: util.c,v 1.28 2015/03/05 15:18:13 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <limits.h>
24: #include <stdarg.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <time.h>
29: #include <unistd.h>
30:
31: #include "extern.h"
32:
33: /*
1.29 ! kristaps 34: * Table of macros.
! 35: * These ABSOLUTELY MUST BE 2 or three characters long.
! 36: */
! 37: static const char *const mdocs[] = {
! 38: "Ap", "Dd", "Dt", "Os",
! 39: "Sh", "Ss", "Pp", "D1",
! 40: "Dl", "Bd", "Ed", "Bl",
! 41: "El", "It", "Ad", "An",
! 42: "Ar", "Cd", "Cm", "Dv",
! 43: "Er", "Ev", "Ex", "Fa",
! 44: "Fd", "Fl", "Fn", "Ft",
! 45: "Ic", "In", "Li", "Nd",
! 46: "Nm", "Op", "Ot", "Pa",
! 47: "Rv", "St", "Va", "Vt",
! 48: "Xr", "%A", "%B", "%D",
! 49: "%I", "%J", "%N", "%O",
! 50: "%P", "%R", "%T", "%V",
! 51: "Ac", "Ao", "Aq", "At",
! 52: "Bc", "Bf", "Bo", "Bq",
! 53: "Bsx", "Bx", "Db", "Dc",
! 54: "Do", "Dq", "Ec", "Ef",
! 55: "Em", "Eo", "Fx", "Ms",
! 56: "No", "Ns", "Nx", "Ox",
! 57: "Pc", "Pf", "Po", "Pq",
! 58: "Qc", "Ql", "Qo", "Qq",
! 59: "Re", "Rs", "Sc", "So",
! 60: "Sq", "Sm", "Sx", "Sy",
! 61: "Tn", "Ux", "Xc", "Xo",
! 62: "Fo", "Fc", "Oo", "Oc",
! 63: "Bk", "Ek", "Bt", "Hf",
! 64: "Fr", "Ud", "Lb", "Lp",
! 65: "Lk", "Mt", "Brq", "Bro",
! 66: "Brc", "%C", "Es", "En",
! 67: "Dx", "%Q", "br", "sp",
! 68: "%U", "Ta", "ll", NULL,
! 69: };
! 70:
! 71: /*
1.1 kristaps 72: * Unmap the top-most file in the stack of files currently opened (that
73: * is, nested calls to parsefile()).
74: */
75: void
76: texifilepop(struct texi *p)
77: {
78: struct texifile *f;
79:
80: assert(p->filepos > 0);
81: f = &p->files[--p->filepos];
1.14 kristaps 82: free(f->map);
1.1 kristaps 83: }
84:
1.7 kristaps 85: static void
86: teximacrofree(struct teximacro *p)
87: {
88: size_t i;
89:
90: for (i = 0; i < p->argsz; i++)
91: free(p->args[i]);
92:
93: free(p->args);
94: free(p->key);
95: free(p->value);
96: }
97:
98: static void
99: texivaluefree(struct texivalue *p)
100: {
101:
102: free(p->key);
103: free(p->value);
104: }
105:
1.1 kristaps 106: /*
107: * Unmap all files that we're currently using and free all resources
108: * that we've allocated during the parse.
109: * The utility should exit(...) after this is called.
110: */
111: void
112: texiexit(struct texi *p)
113: {
114: size_t i;
115:
116: /* Make sure we're newline-terminated. */
117: if (p->outcol)
1.20 kristaps 118: fputc('\n', p->outfile);
119: if (NULL != p->chapters)
120: teximdocclose(p, 1);
1.1 kristaps 121:
122: /* Unmap all files. */
123: while (p->filepos > 0)
124: texifilepop(p);
125:
1.7 kristaps 126: for (i = 0; i < p->macrosz; i++)
127: teximacrofree(&p->macros[i]);
1.1 kristaps 128: for (i = 0; i < p->dirsz; i++)
129: free(p->dirs[i]);
1.4 kristaps 130: for (i = 0; i < p->indexsz; i++)
131: free(p->indexs[i]);
1.7 kristaps 132: for (i = 0; i < p->valsz; i++)
133: texivaluefree(&p->vals[i]);
1.4 kristaps 134:
1.7 kristaps 135: free(p->macros);
1.1 kristaps 136: free(p->vals);
1.4 kristaps 137: free(p->indexs);
1.1 kristaps 138: free(p->dirs);
139: free(p->subtitle);
140: free(p->title);
1.26 kristaps 141: free(p->copying);
1.1 kristaps 142: }
143:
144: /*
145: * Fatal error: unmap all files and exit.
146: * The "errstring" is passed to perror(3).
147: */
148: void
149: texiabort(struct texi *p, const char *errstring)
150: {
151:
152: perror(errstring);
153: texiexit(p);
154: exit(EXIT_FAILURE);
155: }
156:
157: /*
158: * Print a generic warning message (to stderr) tied to our current
159: * location in the parse sequence.
160: */
161: void
162: texiwarn(const struct texi *p, const char *fmt, ...)
163: {
1.15 kristaps 164: va_list ap;
165: const struct texifile *f;
166:
167: f = &p->files[p->filepos - 1];
168:
169: if (f->insplice)
170: fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): "
171: "warning: ", f->name, f->line + 1,
172: f->col + 1, f->insplice);
173: else
174: fprintf(stderr, "%s:%zu:%zu: warning: ",
175: f->name, f->line + 1, f->col + 1);
1.1 kristaps 176:
177: va_start(ap, fmt);
178: vfprintf(stderr, fmt, ap);
179: va_end(ap);
180: fputc('\n', stderr);
181: }
182:
183: /*
184: * Print an error message (to stderr) tied to our current location in
185: * the parse sequence, invoke texiexit(), then die.
186: */
187: void
188: texierr(struct texi *p, const char *fmt, ...)
189: {
1.15 kristaps 190: va_list ap;
191: struct texifile *f;
192:
193: f = &p->files[p->filepos - 1];
194:
195: if (f->insplice)
196: fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): "
197: "error: ", f->name, f->line + 1,
198: f->col + 1, f->insplice);
199: else
200: fprintf(stderr, "%s:%zu:%zu: error: ",
201: f->name, f->line + 1, f->col + 1);
1.1 kristaps 202:
203: va_start(ap, fmt);
204: vfprintf(stderr, fmt, ap);
205: va_end(ap);
206: fputc('\n', stderr);
207: texiexit(p);
208: exit(EXIT_FAILURE);
209: }
210:
211: /*
212: * Put a single data character to the output if we're not ignoring.
1.13 kristaps 213: * Escape starting a line with a control character and slashes.
1.1 kristaps 214: */
215: void
216: texiputchar(struct texi *p, char c)
217: {
218:
219: if (p->ign)
220: return;
221: if ('.' == c && 0 == p->outcol)
1.20 kristaps 222: fputs("\\&", p->outfile);
1.10 kristaps 223: if ('\'' == c && 0 == p->outcol)
1.20 kristaps 224: fputs("\\&", p->outfile);
1.1 kristaps 225:
1.23 kristaps 226: if (p->uppercase)
227: fputc(toupper((unsigned int)c), p->outfile);
228: else
229: fputc(c, p->outfile);
1.13 kristaps 230: if ('\\' == c)
1.20 kristaps 231: fputc('e', p->outfile);
1.1 kristaps 232: if ('\n' == c) {
233: p->outcol = 0;
234: p->seenws = 0;
235: } else
236: p->outcol++;
237: }
238:
239: /*
1.13 kristaps 240: * Put an opaque series of characters.
241: * Characters starting a line with a control character are escaped, but
242: * that's it, so don't use this for non-controlled sequences of text.
1.1 kristaps 243: */
244: void
245: texiputchars(struct texi *p, const char *s)
246: {
247:
1.13 kristaps 248: if (p->ign)
249: return;
250: if ('.' == *s && 0 == p->outcol)
1.20 kristaps 251: fputs("\\&", p->outfile);
1.13 kristaps 252: if ('\'' == *s && 0 == p->outcol)
1.20 kristaps 253: fputs("\\&", p->outfile);
1.23 kristaps 254: if (p->uppercase)
255: for ( ; '\0' != *s; s++)
256: p->outcol += fputc(toupper
257: ((unsigned int)*s), p->outfile);
258: else
259: p->outcol += fputs(s, p->outfile);
1.9 kristaps 260: }
261:
262: /*
263: * This puts all characters onto the output stream but makes sure to
264: * escape mdoc(7) slashes.
1.14 kristaps 265: * FIXME: useless.
1.9 kristaps 266: */
267: void
1.14 kristaps 268: texiputbuf(struct texi *p, size_t start, size_t end)
1.9 kristaps 269: {
270:
1.14 kristaps 271: for ( ; start < end; start++)
272: texiputchar(p, BUF(p)[start]);
1.1 kristaps 273: }
274:
275: /*
276: * Close an mdoc(7) macro opened with teximacroopen().
277: * If there are no more macros on the line, prints a newline.
278: */
279: void
280: teximacroclose(struct texi *p)
281: {
282:
283: if (p->ign)
284: return;
285:
286: if (0 == --p->outmacro) {
1.20 kristaps 287: fputc('\n', p->outfile);
1.1 kristaps 288: p->outcol = p->seenws = 0;
289: }
1.27 kristaps 290: p->seenvs = 0;
1.1 kristaps 291: }
292:
293: /*
294: * Open a mdoc(7) macro.
295: * This is used for line macros, e.g., Qq [foo bar baz].
296: * It can be invoked for nested macros, e.g., Qq Li foo .
297: * TODO: flush-right punctuation (e.g., parenthesis).
298: */
299: void
300: teximacroopen(struct texi *p, const char *s)
301: {
302: int rc;
303:
304: if (p->ign)
305: return;
306:
307: if (p->outcol && 0 == p->outmacro) {
1.20 kristaps 308: fputc('\n', p->outfile);
1.1 kristaps 309: p->outcol = 0;
310: }
311:
312: if (0 == p->outmacro)
1.20 kristaps 313: fputc('.', p->outfile);
1.1 kristaps 314: else
1.20 kristaps 315: fputc(' ', p->outfile);
1.1 kristaps 316:
1.20 kristaps 317: if (EOF != (rc = fputs(s, p->outfile)))
1.1 kristaps 318: p->outcol += rc;
319:
1.20 kristaps 320: fputc(' ', p->outfile);
1.1 kristaps 321: p->outcol++;
322: p->outmacro++;
323: p->seenws = 0;
1.27 kristaps 324: p->seenvs = 0;
1.1 kristaps 325: }
326:
327: /*
328: * Put a stadnalone mdoc(7) command with the trailing newline.
329: */
330: void
331: teximacro(struct texi *p, const char *s)
332: {
333:
334: if (p->ign)
335: return;
336:
337: if (p->outmacro)
338: texierr(p, "\"%s\" in open line scope!?", s);
339: if (p->literal)
340: texierr(p, "\"%s\" in a literal scope!?", s);
341:
342: if (p->outcol)
1.20 kristaps 343: fputc('\n', p->outfile);
1.1 kristaps 344:
1.20 kristaps 345: fputc('.', p->outfile);
346: fputs(s, p->outfile);
347: fputc('\n', p->outfile);
1.1 kristaps 348: p->outcol = p->seenws = 0;
1.27 kristaps 349: p->seenvs = 0;
1.1 kristaps 350: }
351:
352: /*
353: * Introduce vertical space during normal (non-macro) input.
354: */
355: void
356: texivspace(struct texi *p)
357: {
358:
1.27 kristaps 359: if (TEXILIST_TABLE != p->list)
360: teximacro(p, "Pp");
1.1 kristaps 361: }
362:
363: /*
364: * Advance by a single byte in the input stream, adjusting our location
365: * in the current input file.
366: */
367: void
1.14 kristaps 368: advance(struct texi *p, size_t *pos)
1.1 kristaps 369: {
1.15 kristaps 370: struct texifile *f;
1.1 kristaps 371:
1.15 kristaps 372: f = &p->files[p->filepos - 1];
373:
374: if (0 == f->insplice) {
375: if ('\n' == BUF(p)[*pos]) {
376: f->line++;
377: f->col = 0;
378: } else
379: f->col++;
1.17 kristaps 380: } else {
1.15 kristaps 381: --f->insplice;
1.17 kristaps 382: if (0 == f->insplice)
383: f->depth = 0;
384: }
1.1 kristaps 385:
386: (*pos)++;
387: }
388:
389: /*
390: * It's common to wait punctuation to float on the right side of macro
391: * lines in mdoc(7), e.g., ".Em hello ) ."
392: * This function does so, and should be called before teximacroclose().
393: * It will detect that it's the last in the nested macros and
394: * appropriately flush-left punctuation alongside the macro.
395: */
396: void
1.14 kristaps 397: texipunctuate(struct texi *p, size_t *pos)
1.1 kristaps 398: {
399: size_t start, end;
400:
401: if (1 != p->outmacro)
402: return;
403:
1.14 kristaps 404: for (start = end = *pos; end < BUFSZ(p); end++) {
405: switch (BUF(p)[end]) {
1.1 kristaps 406: case (','):
407: case (')'):
408: case ('.'):
409: case ('"'):
410: case (':'):
1.22 kristaps 411: case (';'):
1.1 kristaps 412: case ('!'):
413: case ('?'):
414: continue;
415: default:
416: break;
417: }
418: break;
419: }
420: if (end == *pos)
421: return;
1.14 kristaps 422: if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] ||
423: '\n' == BUF(p)[end]) {
1.1 kristaps 424: for ( ; start < end; start++) {
425: texiputchar(p, ' ');
1.14 kristaps 426: texiputchar(p, BUF(p)[start]);
427: advance(p, pos);
1.1 kristaps 428: }
429: }
430: }
431:
432: /*
433: * Advance to the next non-whitespace word in the input stream.
434: * If we're in literal mode, then print all of the whitespace as we're
435: * doing so.
436: */
437: static size_t
1.14 kristaps 438: advancenext(struct texi *p, size_t *pos)
1.1 kristaps 439: {
440:
441: if (p->literal) {
1.14 kristaps 442: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
443: texiputchar(p, BUF(p)[*pos]);
444: advance(p, pos);
1.1 kristaps 445: }
446: return(*pos);
447: }
448:
1.14 kristaps 449: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) {
1.1 kristaps 450: p->seenws = 1;
1.14 kristaps 451: advance(p, pos);
1.1 kristaps 452: }
453: return(*pos);
454: }
455:
456: /*
457: * Advance to the EOLN in the input stream.
1.22 kristaps 458: * This will skip over '@' markers in an effort to ignore escaped
459: * newlines.
1.1 kristaps 460: */
461: size_t
1.14 kristaps 462: advanceeoln(struct texi *p, size_t *pos, int consumenl)
1.1 kristaps 463: {
464:
1.22 kristaps 465: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
466: if ('@' == BUF(p)[*pos])
467: advance(p, pos);
1.14 kristaps 468: advance(p, pos);
1.22 kristaps 469: }
1.14 kristaps 470: if (*pos < BUFSZ(p) && consumenl)
471: advance(p, pos);
1.1 kristaps 472: return(*pos);
473: }
474:
475: /*
476: * Advance to position "end", which is an absolute position in the
477: * current buffer greater than or equal to the current position.
478: */
479: void
1.14 kristaps 480: advanceto(struct texi *p, size_t *pos, size_t end)
1.1 kristaps 481: {
482:
483: assert(*pos <= end);
484: while (*pos < end)
1.14 kristaps 485: advance(p, pos);
1.1 kristaps 486: }
487:
1.7 kristaps 488: static void
1.17 kristaps 489: texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos)
1.7 kristaps 490: {
1.11 kristaps 491: size_t valsz, realsz, aasz, asz,
492: ssz, i, j, k, start, end;
493: char *val;
494: char **args;
495: const char *cp;
1.7 kristaps 496:
1.17 kristaps 497: /* Disregard empty macros. */
1.22 kristaps 498: if (0 == (valsz = realsz = strlen(m->value))) {
499: args = argparse(p, pos, &asz, m->argsz);
500: for (i = 0; i < asz; i++)
501: free(args[i]);
502: free(args);
1.17 kristaps 503: return;
1.22 kristaps 504: }
1.17 kristaps 505:
506: /*
507: * This is important: it protect us from macros that invoke more
508: * macros, possibly going on infinitely.
509: * We use "sv" instead of the current position because we might
510: * be invoked at the end of the macro (i.e., insplice == 0).
511: * The "sv" value was initialised at the start of the macro.
512: */
513: if (sv > 0)
1.24 kristaps 514: if (++p->files[p->filepos - 1].depth > 64)
1.17 kristaps 515: texierr(p, "maximium recursive depth");
516:
1.14 kristaps 517: args = argparse(p, pos, &asz, m->argsz);
1.7 kristaps 518: if (asz != m->argsz)
519: texiwarn(p, "invalid macro argument length");
520: aasz = asz < m->argsz ? asz : m->argsz;
521:
522: if (0 == aasz) {
1.21 kristaps 523: texisplice(p, m->value, valsz, *pos);
1.7 kristaps 524: return;
525: }
526:
527: val = strdup(m->value);
528:
529: for (i = j = 0; i < realsz; i++) {
530: /* Parse blindly til the backslash delimiter. */
531: if ('\\' != m->value[i]) {
532: val[j++] = m->value[i];
533: val[j] = '\0';
534: continue;
535: } else if (i == realsz - 1)
536: texierr(p, "trailing argument name delimiter");
537:
538: /* Double-backslash is escaped. */
539: if ('\\' == m->value[i + 1]) {
540: val[j++] = m->value[i++];
541: val[j] = '\0';
542: continue;
543: }
544:
545: assert('\\' == m->value[i] && i < realsz - 1);
546:
547: /* Parse to terminating delimiter. */
548: /* FIXME: embedded, escaped delimiters? */
549: for (start = end = i + 1; end < realsz; end++)
550: if ('\\' == m->value[end])
551: break;
552: if (end == realsz)
553: texierr(p, "unterminated argument name");
554:
555: for (k = 0; k < aasz; k++) {
556: if ((ssz = strlen(m->args[k])) != (end - start))
557: continue;
558: if (strncmp(&m->value[start], m->args[k], ssz))
559: continue;
560: break;
561: }
562:
563: /*
564: * Argument didn't exist in argument table.
1.14 kristaps 565: * Just ignore it.
1.7 kristaps 566: */
567: if (k == aasz) {
1.14 kristaps 568: i = end;
1.7 kristaps 569: continue;
570: }
571:
572: if (strlen(args[k]) > ssz) {
573: valsz += strlen(args[k]);
574: val = realloc(val, valsz + 1);
575: if (NULL == val)
576: texiabort(p, NULL);
577: }
578:
1.11 kristaps 579: for (cp = args[k]; '\0' != *cp; cp++)
580: val[j++] = *cp;
581:
582: val[j] = '\0';
1.7 kristaps 583: i = end;
584: }
585:
1.21 kristaps 586: texisplice(p, val, strlen(val), *pos);
1.7 kristaps 587:
588: for (i = 0; i < asz; i++)
589: free(args[i]);
590: free(args);
591: free(val);
592: }
593:
1.1 kristaps 594: /*
595: * Output a free-form word in the input stream, progressing to the next
596: * command or white-space.
597: * This also will advance the input stream.
598: */
599: static void
1.14 kristaps 600: parseword(struct texi *p, size_t *pos, char extra)
1.1 kristaps 601: {
1.29 ! kristaps 602: size_t i, end, len;
! 603: int c;
1.1 kristaps 604:
1.25 kristaps 605: /*
1.27 kristaps 606: * If a prior word had a terminating double-newline, then begin
607: * this text block with a `Pp'.
608: * We don't do this if we're in a literal context (we'll print
609: * out the newlines themselves) nor in a `TS' table.
610: */
611: if (p->seenvs && 0 == p->literal && TEXILIST_TABLE != p->list)
612: teximacro(p, "Pp");
613:
614: p->seenvs = 0;
615:
616: /*
1.25 kristaps 617: * Some line control: if we (non-macro, non-literal) already
618: * have more than 72 characters written to the screen, then
619: * output a newline before getting started.
620: */
1.1 kristaps 621: if (p->seenws && 0 == p->outmacro &&
622: p->outcol > 72 && 0 == p->literal)
623: texiputchar(p, '\n');
1.25 kristaps 624:
625: /* Usual padding in the case of seen whitespace. */
1.1 kristaps 626: if (p->seenws && p->outcol && 0 == p->literal)
627: texiputchar(p, ' ');
628:
629: p->seenws = 0;
1.29 ! kristaps 630:
! 631: /*
! 632: * If we're in a macro line, we might want to print text that
! 633: * happens to be the same as an mdoc(7) macro.
! 634: * Obviously, we need to escape these words.
! 635: */
! 636: if (p->outmacro) {
! 637: end = *pos;
! 638: /* Read ahead to get the word length. */
! 639: while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) {
! 640: switch ((c = BUF(p)[end])) {
! 641: case ('@'):
! 642: case ('}'):
! 643: case ('{'):
! 644: break;
! 645: default:
! 646: if ('\0' != extra && extra == c)
! 647: break;
! 648: end++;
! 649: continue;
! 650: }
! 651: break;
! 652: }
! 653: len = end - *pos;
! 654: /* See if we have a match. */
! 655: for (i = 0; NULL != mdocs[i]; i++) {
! 656: /* All macros are 2 or three letters. */
! 657: if (len < 2 || len > 3)
! 658: continue;
! 659: /* Check the macro word length. */
! 660: if ('\0' == mdocs[i][2] && 2 != len)
! 661: continue;
! 662: else if ('\0' == mdocs[i][3] && 3 != len)
! 663: continue;
! 664: if (strncmp(mdocs[i], &BUF(p)[*pos], len))
! 665: continue;
! 666: texiputchars(p, "\\&");
! 667: break;
! 668: }
! 669: }
1.1 kristaps 670:
1.14 kristaps 671: while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) {
672: switch (BUF(p)[*pos]) {
1.1 kristaps 673: case ('@'):
674: case ('}'):
675: case ('{'):
676: return;
677: }
1.14 kristaps 678: if ('\0' != extra && BUF(p)[*pos] == extra)
1.1 kristaps 679: return;
1.28 kristaps 680:
681: if (p->literal) {
682: texiputchar(p, BUF(p)[*pos]);
683: advance(p, pos);
684: continue;
685: }
686:
687: if (*pos < BUFSZ(p) - 2 &&
688: '-' == BUF(p)[*pos] &&
689: '-' == BUF(p)[*pos + 1] &&
690: '-' == BUF(p)[*pos + 2]) {
691: texiputchars(p, "\\(em");
692: advance(p, pos);
693: advance(p, pos);
694: } else if (*pos < BUFSZ(p) - 1 &&
695: '-' == BUF(p)[*pos] &&
696: '-' == BUF(p)[*pos + 1]) {
697: texiputchars(p, "\\(en");
698: advance(p, pos);
699: } else if (*pos < BUFSZ(p) - 1 &&
1.14 kristaps 700: '`' == BUF(p)[*pos] &&
701: '`' == BUF(p)[*pos + 1]) {
1.1 kristaps 702: texiputchars(p, "\\(lq");
1.14 kristaps 703: advance(p, pos);
704: } else if (*pos < BUFSZ(p) - 1 &&
705: '\'' == BUF(p)[*pos] &&
706: '\'' == BUF(p)[*pos + 1]) {
1.1 kristaps 707: texiputchars(p, "\\(rq");
1.14 kristaps 708: advance(p, pos);
1.1 kristaps 709: } else
1.14 kristaps 710: texiputchar(p, BUF(p)[*pos]);
1.28 kristaps 711:
1.14 kristaps 712: advance(p, pos);
1.1 kristaps 713: }
1.25 kristaps 714:
1.27 kristaps 715: if (*pos + 1 < BUFSZ(p) &&
716: '\n' == BUF(p)[*pos] &&
717: '\n' == BUF(p)[*pos + 1])
718: p->seenvs = 1;
719:
1.25 kristaps 720: /*
721: * New sentence, new line:if we (non-macro, non-literal) see a
722: * period at the end of the last printed word, then open a
723: * newline.
724: */
725: if (0 == p->literal && 0 == p->outmacro &&
726: *pos < BUFSZ(p) && '.' == BUF(p)[*pos - 1])
727: texiputchar(p, '\n');
1.1 kristaps 728: }
729:
730: /*
731: * Look up the command at position "pos" in the buffer, returning it (or
732: * TEXICMD__MAX if none found) and setting "end" to be the absolute
733: * index after the command name.
734: */
735: enum texicmd
1.19 kristaps 736: texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro)
1.1 kristaps 737: {
1.4 kristaps 738: size_t i, len, toksz;
1.1 kristaps 739:
1.14 kristaps 740: assert('@' == BUF(p)[pos]);
1.1 kristaps 741:
1.7 kristaps 742: if (NULL != macro)
743: *macro = NULL;
744:
1.14 kristaps 745: if ((*end = pos) == BUFSZ(p))
1.1 kristaps 746: return(TEXICMD__MAX);
1.14 kristaps 747: else if ((*end = ++pos) == BUFSZ(p))
1.1 kristaps 748: return(TEXICMD__MAX);
749:
750: /* Alphabetic commands are special. */
1.23 kristaps 751: if ( ! isalpha((unsigned int)BUF(p)[pos])) {
1.14 kristaps 752: if ((*end = pos + 1) == BUFSZ(p))
1.1 kristaps 753: return(TEXICMD__MAX);
754: for (i = 0; i < TEXICMD__MAX; i++) {
755: if (1 != texitoks[i].len)
756: continue;
1.14 kristaps 757: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1))
1.1 kristaps 758: return(i);
759: }
1.14 kristaps 760: texiwarn(p, "bad command: @%c", BUF(p)[pos]);
1.1 kristaps 761: return(TEXICMD__MAX);
762: }
763:
1.4 kristaps 764: /* Scan to the end of the possible command name. */
1.14 kristaps 765: for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++)
766: if ((*end > pos && ('@' == BUF(p)[*end] ||
767: '{' == BUF(p)[*end] || '}' == BUF(p)[*end])))
1.1 kristaps 768: break;
769:
1.4 kristaps 770: /* Look for the command. */
1.1 kristaps 771: len = *end - pos;
772: for (i = 0; i < TEXICMD__MAX; i++) {
773: if (len != texitoks[i].len)
774: continue;
1.14 kristaps 775: if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len))
1.1 kristaps 776: return(i);
777: }
778:
1.4 kristaps 779: /* Look for it in our indices. */
780: for (i = 0; i < p->indexsz; i++) {
781: toksz = strlen(p->indexs[i]);
782: if (len != 5 + toksz)
783: continue;
1.14 kristaps 784: if (strncmp(&BUF(p)[pos], p->indexs[i], toksz))
1.4 kristaps 785: continue;
1.14 kristaps 786: if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5))
1.7 kristaps 787: return(TEXICMD_USER_INDEX);
788: }
789:
790: for (i = 0; i < p->macrosz; i++) {
791: if (len != strlen(p->macros[i].key))
792: continue;
1.14 kristaps 793: if (strncmp(&BUF(p)[pos], p->macros[i].key, len))
1.7 kristaps 794: continue;
795: if (NULL != macro)
796: *macro = &p->macros[i];
797: return(TEXICMD__MAX);
1.4 kristaps 798: }
799:
1.14 kristaps 800: texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]);
1.1 kristaps 801: return(TEXICMD__MAX);
802: }
803:
804: /*
805: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
806: * Num should be set to the argument we're currently parsing, although
807: * it suffixes for it to be zero or non-zero.
808: * This will return 1 if there are more arguments, 0 otherwise.
809: * This will stop (returning 0) in the event of EOF or if we're not at a
810: * bracket for the zeroth parse.
811: */
812: int
1.14 kristaps 813: parsearg(struct texi *p, size_t *pos, size_t num)
1.1 kristaps 814: {
1.17 kristaps 815: size_t end, sv;
1.7 kristaps 816: enum texicmd cmd;
817: struct teximacro *macro;
1.1 kristaps 818:
1.14 kristaps 819: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
820: advance(p, pos);
821: if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos]))
1.1 kristaps 822: return(0);
823: if (0 == num)
1.14 kristaps 824: advance(p, pos);
1.1 kristaps 825:
1.14 kristaps 826: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
827: switch (BUF(p)[*pos]) {
1.1 kristaps 828: case (','):
1.14 kristaps 829: advance(p, pos);
1.1 kristaps 830: return(1);
831: case ('}'):
1.14 kristaps 832: advance(p, pos);
1.1 kristaps 833: return(0);
834: case ('{'):
835: if (0 == p->ign)
836: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 837: advance(p, pos);
1.1 kristaps 838: continue;
839: case ('@'):
840: break;
841: default:
1.14 kristaps 842: parseword(p, pos, ',');
1.1 kristaps 843: continue;
844: }
845:
1.17 kristaps 846: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 847: cmd = texicmd(p, *pos, &end, ¯o);
848: advanceto(p, pos, end);
1.7 kristaps 849: if (NULL != macro)
1.17 kristaps 850: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 851: if (TEXICMD__MAX == cmd)
852: continue;
853: if (NULL != texitoks[cmd].fp)
1.14 kristaps 854: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 855: }
856: return(0);
857: }
858:
859: /*
860: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
861: * This will stop in the event of EOF or if we're not at a bracket.
862: */
863: void
1.18 kristaps 864: parsebracket(struct texi *p, size_t *pos, int dostack)
1.1 kristaps 865: {
1.18 kristaps 866: size_t end, sv, stack;
1.7 kristaps 867: enum texicmd cmd;
868: struct teximacro *macro;
1.1 kristaps 869:
1.14 kristaps 870: while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos]))
871: advance(p, pos);
1.1 kristaps 872:
1.14 kristaps 873: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.1 kristaps 874: return;
1.14 kristaps 875: advance(p, pos);
1.1 kristaps 876:
1.18 kristaps 877: stack = 0;
1.14 kristaps 878: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
879: switch (BUF(p)[*pos]) {
1.1 kristaps 880: case ('}'):
1.18 kristaps 881: if (stack > 0) {
882: stack--;
883: advance(p, pos);
884: texiputchar(p, '}');
885: continue;
886: }
1.14 kristaps 887: advance(p, pos);
1.1 kristaps 888: return;
889: case ('{'):
1.18 kristaps 890: if (dostack) {
891: stack++;
892: advance(p, pos);
893: texiputchar(p, '{');
894: continue;
895: }
1.1 kristaps 896: if (0 == p->ign)
897: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 898: advance(p, pos);
1.1 kristaps 899: continue;
900: case ('@'):
901: break;
902: default:
1.14 kristaps 903: parseword(p, pos, '\0');
1.1 kristaps 904: continue;
905: }
906:
1.17 kristaps 907: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 908: cmd = texicmd(p, *pos, &end, ¯o);
909: advanceto(p, pos, end);
1.7 kristaps 910: if (NULL != macro)
1.17 kristaps 911: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 912: if (TEXICMD__MAX == cmd)
913: continue;
914: if (NULL != texitoks[cmd].fp)
1.14 kristaps 915: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 916: }
917: }
918:
919: /*
920: * This should be invoked when we're on a macro line and want to process
921: * to the end of the current input line, doing all of our macros along
922: * the way.
923: */
924: void
1.14 kristaps 925: parseeoln(struct texi *p, size_t *pos)
1.1 kristaps 926: {
1.17 kristaps 927: size_t end, sv;
1.7 kristaps 928: enum texicmd cmd;
929: struct teximacro *macro;
1.1 kristaps 930:
1.14 kristaps 931: while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) {
932: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 933: p->seenws = 1;
934: if (p->literal)
1.14 kristaps 935: texiputchar(p, BUF(p)[*pos]);
936: advance(p, pos);
1.1 kristaps 937: }
1.14 kristaps 938: switch (BUF(p)[*pos]) {
1.1 kristaps 939: case ('}'):
940: if (0 == p->ign)
941: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 942: advance(p, pos);
1.1 kristaps 943: continue;
944: case ('{'):
945: if (0 == p->ign)
946: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 947: advance(p, pos);
1.1 kristaps 948: continue;
949: case ('@'):
950: break;
951: default:
1.14 kristaps 952: parseword(p, pos, '\0');
1.1 kristaps 953: continue;
954: }
955:
1.17 kristaps 956: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 957: cmd = texicmd(p, *pos, &end, ¯o);
958: advanceto(p, pos, end);
1.7 kristaps 959: if (NULL != macro)
1.17 kristaps 960: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 961: if (TEXICMD__MAX == cmd)
962: continue;
963: if (NULL != texitoks[cmd].fp)
1.14 kristaps 964: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 965: }
1.14 kristaps 966:
967: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
968: advance(p, pos);
1.19 kristaps 969: }
970:
971: /*
972: * Peek to see if there's a command after subsequent whitespace.
973: * If so, return the macro identifier.
974: * This DOES NOT work with user-defined macros.
975: */
976: enum texicmd
977: peekcmd(const struct texi *p, size_t pos)
978: {
979: size_t end;
980:
981: while (pos < BUFSZ(p) && ismspace(BUF(p)[pos]))
982: pos++;
983: if (pos == BUFSZ(p) || '@' != BUF(p)[pos])
984: return(TEXICMD__MAX);
985: return(texicmd(p, pos, &end, NULL));
1.1 kristaps 986: }
987:
988: /*
989: * Parse a single word or command.
990: * This will return immediately at the EOF.
991: */
1.14 kristaps 992: static void
993: parsesingle(struct texi *p, size_t *pos)
1.1 kristaps 994: {
1.17 kristaps 995: size_t end, sv;
1.7 kristaps 996: enum texicmd cmd;
997: struct teximacro *macro;
1.1 kristaps 998:
1.14 kristaps 999: if ((*pos = advancenext(p, pos)) >= BUFSZ(p))
1.1 kristaps 1000: return;
1001:
1.14 kristaps 1002: switch (BUF(p)[*pos]) {
1.1 kristaps 1003: case ('}'):
1004: if (0 == p->ign)
1005: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1006: advance(p, pos);
1.1 kristaps 1007: return;
1008: case ('{'):
1009: if (0 == p->ign)
1010: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1011: advance(p, pos);
1.1 kristaps 1012: return;
1013: case ('@'):
1014: break;
1015: default:
1.14 kristaps 1016: parseword(p, pos, '\0');
1.1 kristaps 1017: return;
1018: }
1019:
1.17 kristaps 1020: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1021: cmd = texicmd(p, *pos, &end, ¯o);
1022: advanceto(p, pos, end);
1.7 kristaps 1023: if (NULL != macro)
1.17 kristaps 1024: texiexecmacro(p, macro, sv, pos);
1.1 kristaps 1025: if (TEXICMD__MAX == cmd)
1026: return;
1027: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1028: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1029: }
1030:
1031: /*
1032: * This is used in the @deffn type of command.
1033: * These have an arbitrary number of line arguments; however, these
1034: * arguments may or may not be surrounded by brackets.
1035: * In this function, we parse each one as either a bracketed or
1036: * non-bracketed argument, returning 0 when we've reached the end of
1037: * line or 1 otherwise.
1038: */
1039: int
1.14 kristaps 1040: parselinearg(struct texi *p, size_t *pos)
1.1 kristaps 1041: {
1042:
1.14 kristaps 1043: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) {
1.1 kristaps 1044: p->seenws = 1;
1.14 kristaps 1045: advance(p, pos);
1.1 kristaps 1046: }
1047:
1.14 kristaps 1048: if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos])
1.18 kristaps 1049: parsebracket(p, pos, 0);
1.14 kristaps 1050: else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos])
1051: parsesingle(p, pos);
1.1 kristaps 1052: else
1053: return(0);
1054:
1055: return(1);
1056: }
1057:
1058: /*
1059: * Parse til the end of the buffer.
1060: */
1.14 kristaps 1061: static void
1062: parseeof(struct texi *p)
1.1 kristaps 1063: {
1064: size_t pos;
1065:
1.14 kristaps 1066: for (pos = 0; pos < BUFSZ(p); )
1067: parsesingle(p, &pos);
1.1 kristaps 1068: }
1069:
1.8 kristaps 1070: void
1.21 kristaps 1071: texisplice(struct texi *p, const char *buf, size_t sz, size_t pos)
1.8 kristaps 1072: {
1.14 kristaps 1073: char *cp;
1074: struct texifile *f;
1.8 kristaps 1075:
1.14 kristaps 1076: assert(p->filepos > 0);
1077: f = &p->files[p->filepos - 1];
1.8 kristaps 1078:
1.14 kristaps 1079: if (f->mapsz + sz > f->mapmaxsz) {
1080: f->mapmaxsz = f->mapsz + sz + 1024;
1081: cp = realloc(f->map, f->mapmaxsz);
1082: if (NULL == cp)
1083: texiabort(p, NULL);
1084: f->map = cp;
1085: }
1.8 kristaps 1086:
1.15 kristaps 1087: f->insplice += sz;
1.21 kristaps 1088: memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos);
1089: memcpy(f->map + pos, buf, sz);
1.14 kristaps 1090: f->mapsz += sz;
1.8 kristaps 1091: }
1092:
1093: /*
1.1 kristaps 1094: * Parse a block sequence until we have the "@end endtoken" command
1095: * invocation.
1096: * This will return immediately at EOF.
1097: */
1098: void
1.14 kristaps 1099: parseto(struct texi *p, size_t *pos, const char *endtoken)
1.1 kristaps 1100: {
1.17 kristaps 1101: size_t end, sv;
1.7 kristaps 1102: enum texicmd cmd;
1103: size_t endtoksz;
1104: struct teximacro *macro;
1.1 kristaps 1105:
1106: endtoksz = strlen(endtoken);
1107: assert(endtoksz > 0);
1108:
1.14 kristaps 1109: while ((*pos = advancenext(p, pos)) < BUFSZ(p)) {
1110: switch (BUF(p)[*pos]) {
1.1 kristaps 1111: case ('}'):
1112: if (0 == p->ign)
1113: texiwarn(p, "unexpected \"}\"");
1.14 kristaps 1114: advance(p, pos);
1.1 kristaps 1115: continue;
1116: case ('{'):
1117: if (0 == p->ign)
1118: texiwarn(p, "unexpected \"{\"");
1.14 kristaps 1119: advance(p, pos);
1.1 kristaps 1120: continue;
1121: case ('@'):
1122: break;
1123: default:
1.14 kristaps 1124: parseword(p, pos, '\0');
1.1 kristaps 1125: continue;
1126: }
1127:
1.17 kristaps 1128: sv = p->files[p->filepos - 1].insplice;
1.14 kristaps 1129: cmd = texicmd(p, *pos, &end, ¯o);
1130: advanceto(p, pos, end);
1.1 kristaps 1131: if (TEXICMD_END == cmd) {
1.14 kristaps 1132: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1133: advance(p, pos);
1.1 kristaps 1134: /*
1135: * FIXME: check the full word, not just its
1136: * initial substring!
1137: */
1.14 kristaps 1138: if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp
1139: (&BUF(p)[*pos], endtoken, endtoksz)) {
1140: advanceeoln(p, pos, 0);
1.1 kristaps 1141: break;
1142: }
1143: if (0 == p->ign)
1144: texiwarn(p, "unexpected \"end\"");
1.14 kristaps 1145: advanceeoln(p, pos, 0);
1.1 kristaps 1146: continue;
1.7 kristaps 1147: }
1148: if (NULL != macro)
1.17 kristaps 1149: texiexecmacro(p, macro, sv, pos);
1.7 kristaps 1150: if (TEXICMD__MAX == cmd)
1151: continue;
1152: if (NULL != texitoks[cmd].fp)
1.14 kristaps 1153: (*texitoks[cmd].fp)(p, cmd, pos);
1.1 kristaps 1154: }
1155: }
1156:
1157: /*
1.12 kristaps 1158: * Like parsefile() but used for reading from stdandard input.
1159: * This can only be called for the first file!
1160: */
1161: void
1162: parsestdin(struct texi *p)
1163: {
1164: struct texifile *f;
1165: ssize_t ssz;
1166:
1167: assert(0 == p->filepos);
1168: f = &p->files[p->filepos];
1169: memset(f, 0, sizeof(struct texifile));
1170:
1171: f->type = TEXISRC_STDIN;
1172: f->name = "<stdin>";
1173:
1.14 kristaps 1174: for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) {
1175: if (f->mapsz == f->mapmaxsz) {
1176: if (f->mapmaxsz == (1U << 31))
1.12 kristaps 1177: texierr(p, "stdin buffer too long");
1.14 kristaps 1178: f->mapmaxsz = f->mapmaxsz > 65536 / 2 ?
1179: 2 * f->mapmaxsz : 65536;
1180: f->map = realloc(f->map, f->mapmaxsz);
1.12 kristaps 1181: if (NULL == f->map)
1182: texiabort(p, NULL);
1183: }
1.14 kristaps 1184: ssz = read(STDIN_FILENO, f->map +
1185: (int)f->mapsz, f->mapmaxsz - f->mapsz);
1.12 kristaps 1186: if (0 == ssz)
1187: break;
1188: else if (-1 == ssz)
1189: texiabort(p, NULL);
1190: }
1191:
1192: p->filepos++;
1.14 kristaps 1193: parseeof(p);
1.12 kristaps 1194: texifilepop(p);
1195: }
1196:
1197: /*
1.1 kristaps 1198: * Memory-map the file "fname" and begin parsing it unless "parse" is
1199: * zero, in which case we just dump the file to stdout (making sure it
1200: * doesn't trip up mdoc(7) along the way).
1201: * This can be called in a nested context.
1202: */
1203: void
1204: parsefile(struct texi *p, const char *fname, int parse)
1205: {
1206: struct texifile *f;
1207: int fd;
1208: struct stat st;
1209: size_t i;
1.14 kristaps 1210: char *map;
1.1 kristaps 1211:
1.5 kristaps 1212: if (64 == p->filepos)
1.6 kristaps 1213: texierr(p, "too many open files");
1.1 kristaps 1214: f = &p->files[p->filepos];
1215: memset(f, 0, sizeof(struct texifile));
1216:
1.12 kristaps 1217: f->type = TEXISRC_FILE;
1.1 kristaps 1218: f->name = fname;
1219: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1220: texiabort(p, fname);
1221: } else if (-1 == fstat(fd, &st)) {
1222: close(fd);
1223: texiabort(p, fname);
1224: }
1225:
1.14 kristaps 1226: f->mapsz = f->mapmaxsz = st.st_size;
1227: map = mmap(NULL, f->mapsz,
1.1 kristaps 1228: PROT_READ, MAP_SHARED, fd, 0);
1229: close(fd);
1230:
1.14 kristaps 1231: if (MAP_FAILED == map)
1.1 kristaps 1232: texiabort(p, fname);
1233:
1234: if ( ! parse) {
1.13 kristaps 1235: for (i = 0; i < f->mapsz; i++)
1.14 kristaps 1236: texiputchar(p, map[i]);
1.13 kristaps 1237: if (p->outcol)
1238: texiputchar(p, '\n');
1.14 kristaps 1239: munmap(map, f->mapsz);
1240: return;
1241: }
1242:
1243: p->filepos++;
1244: f->map = malloc(f->mapsz);
1245: memcpy(f->map, map, f->mapsz);
1246: munmap(map, f->mapsz);
1247: parseeof(p);
1.1 kristaps 1248: texifilepop(p);
1249: }
1250:
1.2 kristaps 1251: /*
1252: * Look up the value to a stored pair's value starting in "buf" from
1253: * start to end.
1254: * Return the pointer to the value memory, which can be NULL if the
1255: * pointer key does not exist.
1256: * The pointer can point to NULL if the value has been unset.
1257: */
1258: static char **
1.14 kristaps 1259: valuequery(const struct texi *p, size_t start, size_t end)
1.2 kristaps 1260: {
1261: size_t i, sz, len;
1262:
1263: assert(end >= start);
1264: /* Ignore zero-length. */
1265: if (0 == (len = (end - start)))
1266: return(NULL);
1267: for (i = 0; i < p->valsz; i++) {
1268: sz = strlen(p->vals[i].key);
1269: if (sz != len)
1270: continue;
1.14 kristaps 1271: if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len))
1.2 kristaps 1272: return(&p->vals[i].value);
1273: }
1274: return(NULL);
1275: }
1276:
1277: /*
1278: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1279: * pointer to its value via valuequery().
1280: */
1281: static char **
1.14 kristaps 1282: valuelquery(struct texi *p, size_t *pos)
1.2 kristaps 1283: {
1284: size_t start, end;
1285: char **ret;
1286:
1.14 kristaps 1287: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1288: advance(p, pos);
1289: if (*pos == BUFSZ(p))
1.2 kristaps 1290: return(NULL);
1.14 kristaps 1291: for (start = end = *pos; end < BUFSZ(p); end++)
1292: if ('\n' == BUF(p)[end])
1.2 kristaps 1293: break;
1.14 kristaps 1294: advanceto(p, pos, end);
1295: if (*pos < BUFSZ(p)) {
1296: assert('\n' == BUF(p)[*pos]);
1297: advance(p, pos);
1.2 kristaps 1298: }
1.14 kristaps 1299: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1300: return(NULL);
1301: return(ret);
1302: }
1303:
1304: void
1.14 kristaps 1305: valuelclear(struct texi *p, size_t *pos)
1.2 kristaps 1306: {
1307: char **ret;
1308:
1.14 kristaps 1309: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1310: return;
1311: free(*ret);
1312: *ret = NULL;
1313: }
1314:
1315: const char *
1.14 kristaps 1316: valuellookup(struct texi *p, size_t *pos)
1.2 kristaps 1317: {
1318: char **ret;
1319:
1.14 kristaps 1320: if (NULL == (ret = valuelquery(p, pos)))
1.2 kristaps 1321: return(NULL);
1322: return(*ret);
1323: }
1324:
1325: /*
1326: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1327: * the pointer to its value.
1328: * If the returned pointer is NULL, either there was no string within
1329: * the brackets (or no brackets), or the value was not found, or the
1330: * value had previously been unset.
1331: */
1332: const char *
1.14 kristaps 1333: valueblookup(struct texi *p, size_t *pos)
1.2 kristaps 1334: {
1335: size_t start, end;
1336: char **ret;
1337:
1.14 kristaps 1338: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1339: advance(p, pos);
1340: if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos])
1.2 kristaps 1341: return(NULL);
1.14 kristaps 1342: advance(p, pos);
1343: for (start = end = *pos; end < BUFSZ(p); end++)
1344: if ('}' == BUF(p)[end])
1.2 kristaps 1345: break;
1.14 kristaps 1346: advanceto(p, pos, end);
1347: if (*pos < BUFSZ(p)) {
1348: assert('}' == BUF(p)[*pos]);
1349: advance(p, pos);
1.2 kristaps 1350: }
1.14 kristaps 1351: if (NULL == (ret = valuequery(p, start, end)))
1.2 kristaps 1352: return(NULL);
1353: return(*ret);
1354: }
1355:
1356: void
1357: valueadd(struct texi *p, char *key, char *val)
1358: {
1359: size_t i;
1360:
1361: assert(NULL != key);
1362: assert(NULL != val);
1363:
1364: for (i = 0; i < p->valsz; i++)
1365: if (0 == strcmp(p->vals[i].key, key))
1366: break;
1367:
1368: if (i < p->valsz) {
1369: free(key);
1370: free(p->vals[i].value);
1371: p->vals[i].value = val;
1372: } else {
1.4 kristaps 1373: /* FIXME: reallocarray() */
1.2 kristaps 1374: p->vals = realloc(p->vals,
1375: (p->valsz + 1) *
1376: sizeof(struct texivalue));
1.4 kristaps 1377: if (NULL == p->vals)
1378: texiabort(p, NULL);
1.2 kristaps 1379: p->vals[p->valsz].key = key;
1380: p->vals[p->valsz].value = val;
1381: p->valsz++;
1382: }
1.7 kristaps 1383: }
1384:
1385: /*
1386: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1387: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1388: * an array of arguments of size "argsz".
1389: * These need to be freed individually and as a whole.
1390: * NOTE: this will puke on @, or @} macros, which can trick it into
1391: * stopping argument parsing earlier.
1392: * Ergo, textual: this doesn't interpret the arguments in any way.
1393: */
1394: char **
1.14 kristaps 1395: argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1396: {
1397: char **args;
1398: size_t start, end, stack;
1399:
1.14 kristaps 1400: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1401: advance(p, pos);
1.7 kristaps 1402:
1403: args = NULL;
1404: *argsz = 0;
1405:
1.17 kristaps 1406: if (*pos == BUFSZ(p))
1407: return(args);
1408:
1.14 kristaps 1409: if ('{' != BUF(p)[*pos] && hint) {
1.10 kristaps 1410: /*
1411: * Special case: if we encounter an unbracketed argument
1412: * and we're being invoked with non-zero arguments
1413: * (versus being set, i.e., hint>0), then parse until
1414: * the end of line.
1415: */
1416: *argsz = 1;
1417: args = calloc(1, sizeof(char *));
1418: if (NULL == args)
1419: texiabort(p, NULL);
1420: start = *pos;
1.14 kristaps 1421: while (*pos < BUFSZ(p)) {
1422: if ('\n' == BUF(p)[*pos])
1.10 kristaps 1423: break;
1.14 kristaps 1424: advance(p, pos);
1.10 kristaps 1425: }
1426: args[0] = malloc(*pos - start + 1);
1.14 kristaps 1427: memcpy(args[0], &BUF(p)[start], *pos - start);
1.10 kristaps 1428: args[0][*pos - start] = '\0';
1.14 kristaps 1429: if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos])
1430: advance(p, pos);
1.10 kristaps 1431: return(args);
1.14 kristaps 1432: } else if ('{' != BUF(p)[*pos])
1.7 kristaps 1433: return(args);
1.17 kristaps 1434:
1435: assert('{' == BUF(p)[*pos]);
1.7 kristaps 1436:
1437: /* Parse til the closing '}', putting into the array. */
1.14 kristaps 1438: advance(p, pos);
1439: while (*pos < BUFSZ(p)) {
1440: while (*pos < BUFSZ(p) && isws(BUF(p)[*pos]))
1441: advance(p, pos);
1.7 kristaps 1442: start = *pos;
1443: stack = 0;
1.14 kristaps 1444: while (*pos < BUFSZ(p)) {
1.7 kristaps 1445: /*
1446: * According to the manual, commas within
1447: * embedded commands are escaped.
1448: * We keep track of embedded-ness in the "stack"
1449: * state anyway, so this is free.
1450: */
1.14 kristaps 1451: if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1452: break;
1.14 kristaps 1453: else if (0 == stack && '}' == BUF(p)[*pos])
1.7 kristaps 1454: break;
1.14 kristaps 1455: else if (0 != stack && '}' == BUF(p)[*pos])
1.7 kristaps 1456: stack--;
1.14 kristaps 1457: else if ('{' == BUF(p)[*pos])
1.7 kristaps 1458: stack++;
1.14 kristaps 1459: advance(p, pos);
1.7 kristaps 1460: }
1461: if (stack)
1462: texiwarn(p, "unterminated macro "
1463: "in macro arguments");
1.14 kristaps 1464: if ((end = *pos) == BUFSZ(p))
1.7 kristaps 1465: break;
1466: /* Test for zero-length '{ }'. */
1.14 kristaps 1467: if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz)
1.7 kristaps 1468: break;
1469: /* FIXME: use reallocarray. */
1470: args = realloc
1471: (args, sizeof(char *) *
1472: (*argsz + 1));
1473: if (NULL == args)
1474: texiabort(p, NULL);
1475: args[*argsz] = malloc(end - start + 1);
1476: if (NULL == args[*argsz])
1477: texiabort(p, NULL);
1478: memcpy(args[*argsz],
1.14 kristaps 1479: &BUF(p)[start], end - start);
1.7 kristaps 1480: args[*argsz][end - start] = '\0';
1481: (*argsz)++;
1.14 kristaps 1482: if ('}' == BUF(p)[*pos])
1.7 kristaps 1483: break;
1.14 kristaps 1484: advance(p, pos);
1.7 kristaps 1485: }
1486:
1.14 kristaps 1487: if (*pos == BUFSZ(p))
1.7 kristaps 1488: texierr(p, "unterminated arguments");
1.14 kristaps 1489: assert('}' == BUF(p)[*pos]);
1490: advance(p, pos);
1.7 kristaps 1491: return(args);
1.2 kristaps 1492: }
1.20 kristaps 1493:
1494: /*
1495: * If we're printing chapters, then do some naviation here and then
1496: * close our outfile.
1497: * I want to call this the SEE ALSO section, but that's not really what
1498: * it is: we'll refer to the "initial" (top) node and the next and
1499: * previous chapters.
1500: */
1501: void
1502: teximdocclose(struct texi *p, int last)
1503: {
1504: char buf[PATH_MAX];
1505:
1506: if (NULL == p->chapters || 0 == p->chapnum)
1507: return;
1508:
1509: teximacro(p, "Sh INFO NAVIGATION");
1510:
1511: /* Print a reference to the "top" node. */
1512: if (p->chapnum > 1) {
1.22 kristaps 1513: texiputchars(p, "Top node,");
1.20 kristaps 1514: snprintf(buf, sizeof(buf), "node1 7");
1515: teximacroopen(p, "Xr ");
1516: texiputchars(p, buf);
1.22 kristaps 1517: texiputchars(p, " ;");
1.20 kristaps 1518: teximacroclose(p);
1519: }
1520:
1521: /* Print a reference to the previous node. */
1522: if (p->chapnum > 2) {
1.22 kristaps 1523: texiputchars(p, "previous node,");
1.20 kristaps 1524: snprintf(buf, sizeof(buf),
1525: "node%zu 7", p->chapnum - 1);
1526: teximacroopen(p, "Xr ");
1527: texiputchars(p, buf);
1528: if ( ! last)
1.22 kristaps 1529: texiputchars(p, " ;");
1.20 kristaps 1530: teximacroclose(p);
1531: }
1532:
1533: /* Print a reference to the next node. */
1534: if ( ! last) {
1.22 kristaps 1535: if (1 == p->chapnum)
1536: texiputchars(p, "Next node,");
1537: else
1538: texiputchars(p, "next node,");
1.20 kristaps 1539: snprintf(buf, sizeof(buf),
1540: "node%zu 7", p->chapnum + 1);
1541: teximacroopen(p, "Xr ");
1542: texiputchars(p, buf);
1543: teximacroclose(p);
1544: }
1545:
1546: fclose(p->outfile);
1547: }
1548:
1549: /*
1550: * Open a mdoc(7) context.
1551: * If we're printing chapters, then open the outfile here, too.
1552: * Otherwise just print the mdoc(7) prologue.
1553: */
1554: void
1.21 kristaps 1555: teximdocopen(struct texi *p, size_t *pos)
1.20 kristaps 1556: {
1557: const char *cp;
1558: time_t t;
1559: char date[32];
1560: char fname[PATH_MAX];
1561:
1562: if (NULL != p->chapters) {
1563: snprintf(fname, sizeof(fname), "%s/node%zu.7",
1564: p->chapters, ++p->chapnum);
1565: p->outfile = fopen(fname, "w");
1566: if (NULL == p->outfile)
1567: texiabort(p, fname);
1568: }
1569:
1570: /*
1571: * Here we print our standard mdoc(7) prologue.
1572: * We use the title set with @settitle for the `Nd' description
1573: * and the source document filename (the first one as invoked on
1574: * the command line) for the title.
1575: * The date is set to the current date.
1576: */
1577: t = time(NULL);
1578: strftime(date, sizeof(date), "%F", localtime(&t));
1579:
1580: teximacroopen(p, "Dd");
1581: texiputchars(p, date);
1582: teximacroclose(p);
1583: teximacroopen(p, "Dt");
1584: for (cp = p->title; '\0' != *cp; cp++)
1585: texiputchar(p, toupper((unsigned int)*cp));
1586: texiputchars(p, " 7");
1587: teximacroclose(p);
1588: teximacro(p, "Os");
1589: teximacro(p, "Sh NAME");
1590: teximacroopen(p, "Nm");
1591: for (cp = p->title; '\0' != *cp; cp++)
1592: texiputchar(p, *cp);
1593: teximacroclose(p);
1594: teximacroopen(p, "Nd");
1.21 kristaps 1595: /*
1596: * The subtitle `Nd' can consist of arbitrary macros, so paste
1597: * it and parse to the end of the line.
1598: */
1599: if (NULL != p->subtitle) {
1600: texisplice(p, p->subtitle, strlen(p->subtitle), *pos);
1601: parseeoln(p, pos);
1602: } else
1.20 kristaps 1603: texiputchars(p, "Unknown description");
1604: teximacroclose(p);
1605: }
1606:
CVSweb