Annotation of texi2mdoc/util.c, Revision 1.11
1.11 ! kristaps 1: /* $Id: util.c,v 1.10 2015/02/23 15:09:09 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <libgen.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <time.h>
31: #include <unistd.h>
32:
33: #include "extern.h"
34:
35: /*
36: * Unmap the top-most file in the stack of files currently opened (that
37: * is, nested calls to parsefile()).
38: */
39: void
40: texifilepop(struct texi *p)
41: {
42: struct texifile *f;
43:
44: assert(p->filepos > 0);
45: f = &p->files[--p->filepos];
46: munmap(f->map, f->mapsz);
47: }
48:
1.7 kristaps 49: static void
50: teximacrofree(struct teximacro *p)
51: {
52: size_t i;
53:
54: for (i = 0; i < p->argsz; i++)
55: free(p->args[i]);
56:
57: free(p->args);
58: free(p->key);
59: free(p->value);
60: }
61:
62: static void
63: texivaluefree(struct texivalue *p)
64: {
65:
66: free(p->key);
67: free(p->value);
68: }
69:
1.1 kristaps 70: /*
71: * Unmap all files that we're currently using and free all resources
72: * that we've allocated during the parse.
73: * The utility should exit(...) after this is called.
74: */
75: void
76: texiexit(struct texi *p)
77: {
78: size_t i;
79:
80: /* Make sure we're newline-terminated. */
81: if (p->outcol)
82: putchar('\n');
83:
84: /* Unmap all files. */
85: while (p->filepos > 0)
86: texifilepop(p);
87:
1.7 kristaps 88: for (i = 0; i < p->macrosz; i++)
89: teximacrofree(&p->macros[i]);
1.1 kristaps 90: for (i = 0; i < p->dirsz; i++)
91: free(p->dirs[i]);
1.4 kristaps 92: for (i = 0; i < p->indexsz; i++)
93: free(p->indexs[i]);
1.7 kristaps 94: for (i = 0; i < p->valsz; i++)
95: texivaluefree(&p->vals[i]);
1.4 kristaps 96:
1.7 kristaps 97: free(p->macros);
1.1 kristaps 98: free(p->vals);
1.4 kristaps 99: free(p->indexs);
1.1 kristaps 100: free(p->dirs);
101: free(p->subtitle);
102: free(p->title);
103: }
104:
105: /*
106: * Fatal error: unmap all files and exit.
107: * The "errstring" is passed to perror(3).
108: */
109: void
110: texiabort(struct texi *p, const char *errstring)
111: {
112:
113: perror(errstring);
114: texiexit(p);
115: exit(EXIT_FAILURE);
116: }
117:
118: /*
119: * Print a generic warning message (to stderr) tied to our current
120: * location in the parse sequence.
121: */
122: void
123: texiwarn(const struct texi *p, const char *fmt, ...)
124: {
125: va_list ap;
126:
127: fprintf(stderr, "%s:%zu:%zu: warning: ",
128: p->files[p->filepos - 1].name,
129: p->files[p->filepos - 1].line + 1,
130: p->files[p->filepos - 1].col + 1);
131: va_start(ap, fmt);
132: vfprintf(stderr, fmt, ap);
133: va_end(ap);
134: fputc('\n', stderr);
135: }
136:
137: /*
138: * Print an error message (to stderr) tied to our current location in
139: * the parse sequence, invoke texiexit(), then die.
140: */
141: void
142: texierr(struct texi *p, const char *fmt, ...)
143: {
144: va_list ap;
145:
146: fprintf(stderr, "%s:%zu:%zu: error: ",
147: p->files[p->filepos - 1].name,
148: p->files[p->filepos - 1].line + 1,
149: p->files[p->filepos - 1].col + 1);
150: va_start(ap, fmt);
151: vfprintf(stderr, fmt, ap);
152: va_end(ap);
153: fputc('\n', stderr);
154: texiexit(p);
155: exit(EXIT_FAILURE);
156: }
157:
158: /*
159: * Put a single data character to the output if we're not ignoring.
160: * Makes sure we don't spurriously start a macro.
161: * Adjusts our output status.
162: * This shouldn't be called for macros: just for ordinary text.
163: */
164: void
165: texiputchar(struct texi *p, char c)
166: {
167:
168: if (p->ign)
169: return;
170:
171: if ('.' == c && 0 == p->outcol)
172: fputs("\\&", stdout);
1.10 kristaps 173: if ('\'' == c && 0 == p->outcol)
174: fputs("\\&", stdout);
1.1 kristaps 175:
176: putchar(c);
177: p->seenvs = 0;
178: if ('\n' == c) {
179: p->outcol = 0;
180: p->seenws = 0;
181: } else
182: p->outcol++;
183: }
184:
185: /*
186: * Put multiple characters (see texiputchar()).
187: * This shouldn't be called for macros: just for ordinary text.
188: */
189: void
190: texiputchars(struct texi *p, const char *s)
191: {
192:
193: while ('\0' != *s)
194: texiputchar(p, *s++);
1.9 kristaps 195: }
196:
197: /*
198: * This puts all characters onto the output stream but makes sure to
199: * escape mdoc(7) slashes.
200: */
201: void
202: texiputbuf(struct texi *p, const char *buf, size_t start, size_t end)
203: {
204:
205: for ( ; start < end; start++) {
206: texiputchar(p, buf[start]);
207: if ('\\' == buf[start])
208: texiputchar(p, 'e');
209: }
1.1 kristaps 210: }
211:
212: /*
213: * Close an mdoc(7) macro opened with teximacroopen().
214: * If there are no more macros on the line, prints a newline.
215: */
216: void
217: teximacroclose(struct texi *p)
218: {
219:
220: if (p->ign)
221: return;
222:
223: if (0 == --p->outmacro) {
224: putchar('\n');
225: p->outcol = p->seenws = 0;
226: }
227: }
228:
229: /*
230: * Open a mdoc(7) macro.
231: * This is used for line macros, e.g., Qq [foo bar baz].
232: * It can be invoked for nested macros, e.g., Qq Li foo .
233: * TODO: flush-right punctuation (e.g., parenthesis).
234: */
235: void
236: teximacroopen(struct texi *p, const char *s)
237: {
238: int rc;
239:
240: if (p->ign)
241: return;
242:
243: if (p->outcol && 0 == p->outmacro) {
244: putchar('\n');
245: p->outcol = 0;
246: }
247:
248: if (0 == p->outmacro)
249: putchar('.');
250: else
251: putchar(' ');
252:
253: if (EOF != (rc = fputs(s, stdout)))
254: p->outcol += rc;
255:
256: putchar(' ');
257: p->outcol++;
258: p->outmacro++;
259: p->seenws = 0;
260: }
261:
262: /*
263: * Put a stadnalone mdoc(7) command with the trailing newline.
264: */
265: void
266: teximacro(struct texi *p, const char *s)
267: {
268:
269: if (p->ign)
270: return;
271:
272: if (p->outmacro)
273: texierr(p, "\"%s\" in open line scope!?", s);
274: if (p->literal)
275: texierr(p, "\"%s\" in a literal scope!?", s);
276:
277: if (p->outcol)
278: putchar('\n');
279:
280: putchar('.');
281: puts(s);
282: p->outcol = p->seenws = 0;
283: }
284:
285: /*
286: * Introduce vertical space during normal (non-macro) input.
287: */
288: void
289: texivspace(struct texi *p)
290: {
291:
1.5 kristaps 292: if (p->seenvs || TEXILIST_TABLE == p->list)
1.1 kristaps 293: return;
294: teximacro(p, "Pp");
295: p->seenvs = 1;
296: }
297:
298: /*
299: * Advance by a single byte in the input stream, adjusting our location
300: * in the current input file.
301: */
302: void
303: advance(struct texi *p, const char *buf, size_t *pos)
304: {
305:
306: if ('\n' == buf[*pos]) {
307: p->files[p->filepos - 1].line++;
308: p->files[p->filepos - 1].col = 0;
309: } else
310: p->files[p->filepos - 1].col++;
311:
312: (*pos)++;
313: }
314:
315: /*
316: * It's common to wait punctuation to float on the right side of macro
317: * lines in mdoc(7), e.g., ".Em hello ) ."
318: * This function does so, and should be called before teximacroclose().
319: * It will detect that it's the last in the nested macros and
320: * appropriately flush-left punctuation alongside the macro.
321: */
322: void
323: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
324: {
325: size_t start, end;
326:
327: if (1 != p->outmacro)
328: return;
329:
330: for (start = end = *pos; end < sz; end++) {
331: switch (buf[end]) {
332: case (','):
333: case (')'):
334: case ('.'):
335: case ('"'):
336: case (':'):
337: case ('!'):
338: case ('?'):
339: continue;
340: default:
341: break;
342: }
343: break;
344: }
345: if (end == *pos)
346: return;
347: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
348: for ( ; start < end; start++) {
349: texiputchar(p, ' ');
350: texiputchar(p, buf[start]);
351: advance(p, buf, pos);
352: }
353: }
354: }
355:
356: /*
357: * Advance to the next non-whitespace word in the input stream.
358: * If we're in literal mode, then print all of the whitespace as we're
359: * doing so.
360: */
361: static size_t
362: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
363: {
364:
365: if (p->literal) {
366: while (*pos < sz && ismspace(buf[*pos])) {
367: if (*pos && '\n' == buf[*pos] &&
368: '\\' == buf[*pos - 1])
369: texiputchar(p, 'e');
370: texiputchar(p, buf[*pos]);
371: advance(p, buf, pos);
372: }
373: return(*pos);
374: }
375:
376: while (*pos < sz && ismspace(buf[*pos])) {
377: p->seenws = 1;
378: /*
379: * If it looks like we've printed a double-line, then
380: * output a paragraph.
381: * FIXME: this is stupid.
382: */
383: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
384: texivspace(p);
385: advance(p, buf, pos);
386: }
387: return(*pos);
388: }
389:
390: /*
391: * Advance to the EOLN in the input stream.
392: * NOTE: THIS SHOULD NOT BE CALLED ON BLANK TEXT, as it will read up to
393: * the @\n.
394: */
395: size_t
396: advanceeoln(struct texi *p, const char *buf,
397: size_t sz, size_t *pos, int consumenl)
398: {
399:
400: while (*pos < sz && '\n' != buf[*pos])
401: advance(p, buf, pos);
402: if (*pos < sz && consumenl)
403: advance(p, buf, pos);
404: return(*pos);
405: }
406:
407: /*
408: * Advance to position "end", which is an absolute position in the
409: * current buffer greater than or equal to the current position.
410: */
411: void
412: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
413: {
414:
415: assert(*pos <= end);
416: while (*pos < end)
417: advance(p, buf, pos);
418: }
419:
1.7 kristaps 420: static void
421: texiexecmacro(struct texi *p, struct teximacro *m,
422: const char *buf, size_t sz, size_t *pos)
423: {
1.11 ! kristaps 424: size_t valsz, realsz, aasz, asz,
! 425: ssz, i, j, k, start, end;
! 426: char *val;
! 427: char **args;
! 428: const char *cp;
1.7 kristaps 429:
1.8 kristaps 430: args = argparse(p, buf, sz, pos, &asz, m->argsz);
1.7 kristaps 431: if (asz != m->argsz)
432: texiwarn(p, "invalid macro argument length");
433: aasz = asz < m->argsz ? asz : m->argsz;
434:
435: if (0 == aasz) {
1.8 kristaps 436: parsemembuf(p, m->value, strlen(m->value));
1.7 kristaps 437: return;
438: }
439:
440: valsz = realsz = strlen(m->value);
441: val = strdup(m->value);
442:
443: for (i = j = 0; i < realsz; i++) {
444: /* Parse blindly til the backslash delimiter. */
445: if ('\\' != m->value[i]) {
446: val[j++] = m->value[i];
447: val[j] = '\0';
448: continue;
449: } else if (i == realsz - 1)
450: texierr(p, "trailing argument name delimiter");
451:
452: /* Double-backslash is escaped. */
453: if ('\\' == m->value[i + 1]) {
454: val[j++] = m->value[i++];
455: val[j] = '\0';
456: continue;
457: }
458:
459: assert('\\' == m->value[i] && i < realsz - 1);
460:
461: /* Parse to terminating delimiter. */
462: /* FIXME: embedded, escaped delimiters? */
463: for (start = end = i + 1; end < realsz; end++)
464: if ('\\' == m->value[end])
465: break;
466: if (end == realsz)
467: texierr(p, "unterminated argument name");
468:
469: for (k = 0; k < aasz; k++) {
470: if ((ssz = strlen(m->args[k])) != (end - start))
471: continue;
472: if (strncmp(&m->value[start], m->args[k], ssz))
473: continue;
474: break;
475: }
476:
477: /*
478: * Argument didn't exist in argument table.
479: * No need to reallocate here: we just copy the text
480: * directly from the macro value into the buffer.
481: */
482: if (k == aasz) {
483: for ( ; i < end; i++)
484: val[j++] = m->value[i];
485: assert('\\' == m->value[i]);
486: val[j++] = m->value[i];
487: val[j] = '\0';
488: continue;
489: }
490:
491: if (strlen(args[k]) > ssz) {
492: valsz += strlen(args[k]);
493: val = realloc(val, valsz + 1);
494: if (NULL == val)
495: texiabort(p, NULL);
496: }
497:
1.11 ! kristaps 498: for (cp = args[k]; '\0' != *cp; cp++)
! 499: val[j++] = *cp;
! 500:
! 501: val[j] = '\0';
1.7 kristaps 502: i = end;
503: }
504:
1.8 kristaps 505: parsemembuf(p, val, strlen(val));
1.7 kristaps 506:
507: for (i = 0; i < asz; i++)
508: free(args[i]);
509: free(args);
510: free(val);
511: }
512:
1.1 kristaps 513: /*
514: * Output a free-form word in the input stream, progressing to the next
515: * command or white-space.
516: * This also will advance the input stream.
517: */
518: static void
519: texiword(struct texi *p, const char *buf,
520: size_t sz, size_t *pos, char extra)
521: {
522:
523: if (p->seenws && 0 == p->outmacro &&
524: p->outcol > 72 && 0 == p->literal)
525: texiputchar(p, '\n');
526: /* FIXME: abstract this: we use it elsewhere. */
527: if (p->seenws && p->outcol && 0 == p->literal)
528: texiputchar(p, ' ');
529:
530: p->seenws = 0;
531:
532: while (*pos < sz && ! ismspace(buf[*pos])) {
533: switch (buf[*pos]) {
534: case ('@'):
535: case ('}'):
536: case ('{'):
537: return;
538: }
539: if ('\0' != extra && buf[*pos] == extra)
540: return;
541: if (*pos < sz - 1 &&
542: '`' == buf[*pos] &&
543: '`' == buf[*pos + 1]) {
544: texiputchars(p, "\\(lq");
545: advance(p, buf, pos);
546: } else if (*pos < sz - 1 &&
547: '\'' == buf[*pos] &&
548: '\'' == buf[*pos + 1]) {
549: texiputchars(p, "\\(rq");
550: advance(p, buf, pos);
1.10 kristaps 551: } else if ('\\' == buf[*pos]) {
552: texiputchar(p, buf[*pos]);
553: texiputchar(p, 'e');
1.1 kristaps 554: } else
555: texiputchar(p, buf[*pos]);
556: advance(p, buf, pos);
557: }
558: }
559:
560: /*
561: * Look up the command at position "pos" in the buffer, returning it (or
562: * TEXICMD__MAX if none found) and setting "end" to be the absolute
563: * index after the command name.
564: */
565: enum texicmd
1.7 kristaps 566: texicmd(struct texi *p, const char *buf, size_t pos,
567: size_t sz, size_t *end, struct teximacro **macro)
1.1 kristaps 568: {
1.4 kristaps 569: size_t i, len, toksz;
1.1 kristaps 570:
571: assert('@' == buf[pos]);
572:
1.7 kristaps 573: if (NULL != macro)
574: *macro = NULL;
575:
1.1 kristaps 576: if ((*end = pos) == sz)
577: return(TEXICMD__MAX);
578: else if ((*end = ++pos) == sz)
579: return(TEXICMD__MAX);
580:
581: /* Alphabetic commands are special. */
582: if ( ! isalpha(buf[pos])) {
583: if ((*end = pos + 1) == sz)
584: return(TEXICMD__MAX);
585: for (i = 0; i < TEXICMD__MAX; i++) {
586: if (1 != texitoks[i].len)
587: continue;
588: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
589: return(i);
590: }
591: texiwarn(p, "bad command: @%c", buf[pos]);
592: return(TEXICMD__MAX);
593: }
594:
1.4 kristaps 595: /* Scan to the end of the possible command name. */
1.1 kristaps 596: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
597: if ((*end > pos && ('@' == buf[*end] ||
598: '{' == buf[*end] || '}' == buf[*end])))
599: break;
600:
1.4 kristaps 601: /* Look for the command. */
1.1 kristaps 602: len = *end - pos;
603: for (i = 0; i < TEXICMD__MAX; i++) {
604: if (len != texitoks[i].len)
605: continue;
606: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
607: return(i);
608: }
609:
1.4 kristaps 610: /* Look for it in our indices. */
611: for (i = 0; i < p->indexsz; i++) {
612: toksz = strlen(p->indexs[i]);
613: if (len != 5 + toksz)
614: continue;
615: if (strncmp(&buf[pos], p->indexs[i], toksz))
616: continue;
617: if (0 == strncmp(&buf[pos + toksz], "index", 5))
1.7 kristaps 618: return(TEXICMD_USER_INDEX);
619: }
620:
621: for (i = 0; i < p->macrosz; i++) {
622: if (len != strlen(p->macros[i].key))
623: continue;
624: if (strncmp(&buf[pos], p->macros[i].key, len))
625: continue;
626: if (NULL != macro)
627: *macro = &p->macros[i];
628: return(TEXICMD__MAX);
1.4 kristaps 629: }
630:
1.1 kristaps 631: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
632: return(TEXICMD__MAX);
633: }
634:
635: /*
636: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
637: * Num should be set to the argument we're currently parsing, although
638: * it suffixes for it to be zero or non-zero.
639: * This will return 1 if there are more arguments, 0 otherwise.
640: * This will stop (returning 0) in the event of EOF or if we're not at a
641: * bracket for the zeroth parse.
642: */
643: int
644: parsearg(struct texi *p, const char *buf,
645: size_t sz, size_t *pos, size_t num)
646: {
1.7 kristaps 647: size_t end;
648: enum texicmd cmd;
649: struct teximacro *macro;
1.1 kristaps 650:
651: while (*pos < sz && ismspace(buf[*pos]))
652: advance(p, buf, pos);
653: if (*pos == sz || (0 == num && '{' != buf[*pos]))
654: return(0);
655: if (0 == num)
656: advance(p, buf, pos);
657:
658: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
659: switch (buf[*pos]) {
660: case (','):
661: advance(p, buf, pos);
662: return(1);
663: case ('}'):
664: advance(p, buf, pos);
665: return(0);
666: case ('{'):
667: if (0 == p->ign)
668: texiwarn(p, "unexpected \"{\"");
669: advance(p, buf, pos);
670: continue;
671: case ('@'):
672: break;
673: default:
674: texiword(p, buf, sz, pos, ',');
675: continue;
676: }
677:
1.7 kristaps 678: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 679: advanceto(p, buf, pos, end);
1.7 kristaps 680: if (NULL != macro)
681: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 682: if (TEXICMD__MAX == cmd)
683: continue;
684: if (NULL != texitoks[cmd].fp)
685: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
686: }
687: return(0);
688: }
689:
690: /*
691: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
692: * This will stop in the event of EOF or if we're not at a bracket.
693: */
694: void
695: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
696: {
1.7 kristaps 697: size_t end;
698: enum texicmd cmd;
699: struct teximacro *macro;
1.1 kristaps 700:
701: while (*pos < sz && ismspace(buf[*pos]))
702: advance(p, buf, pos);
703:
704: if (*pos == sz || '{' != buf[*pos])
705: return;
706: advance(p, buf, pos);
707:
708: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
709: switch (buf[*pos]) {
710: case ('}'):
711: advance(p, buf, pos);
712: return;
713: case ('{'):
714: if (0 == p->ign)
715: texiwarn(p, "unexpected \"{\"");
716: advance(p, buf, pos);
717: continue;
718: case ('@'):
719: break;
720: default:
721: texiword(p, buf, sz, pos, '\0');
722: continue;
723: }
724:
1.7 kristaps 725: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 726: advanceto(p, buf, pos, end);
1.7 kristaps 727: if (NULL != macro)
728: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 729: if (TEXICMD__MAX == cmd)
730: continue;
731: if (NULL != texitoks[cmd].fp)
732: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
733: }
734: }
735:
736: /*
737: * This should be invoked when we're on a macro line and want to process
738: * to the end of the current input line, doing all of our macros along
739: * the way.
740: */
741: void
742: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
743: {
1.7 kristaps 744: size_t end;
745: enum texicmd cmd;
746: struct teximacro *macro;
1.1 kristaps 747:
748: while (*pos < sz && '\n' != buf[*pos]) {
749: while (*pos < sz && isws(buf[*pos])) {
750: p->seenws = 1;
751: if (p->literal)
752: texiputchar(p, buf[*pos]);
753: advance(p, buf, pos);
754: }
755: switch (buf[*pos]) {
756: case ('}'):
757: if (0 == p->ign)
758: texiwarn(p, "unexpected \"}\"");
759: advance(p, buf, pos);
760: continue;
761: case ('{'):
762: if (0 == p->ign)
763: texiwarn(p, "unexpected \"{\"");
764: advance(p, buf, pos);
765: continue;
766: case ('@'):
767: break;
768: default:
769: texiword(p, buf, sz, pos, '\0');
770: continue;
771: }
772:
1.7 kristaps 773: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 774: advanceto(p, buf, pos, end);
1.7 kristaps 775: if (NULL != macro)
776: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 777: if (TEXICMD__MAX == cmd)
778: continue;
779: if (NULL != texitoks[cmd].fp)
780: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
781: }
782: }
783:
784: /*
785: * Parse a single word or command.
786: * This will return immediately at the EOF.
787: */
788: void
789: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
790: {
1.7 kristaps 791: size_t end;
792: enum texicmd cmd;
793: struct teximacro *macro;
1.1 kristaps 794:
795: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
796: return;
797:
798: switch (buf[*pos]) {
799: case ('}'):
800: if (0 == p->ign)
801: texiwarn(p, "unexpected \"}\"");
802: advance(p, buf, pos);
803: return;
804: case ('{'):
805: if (0 == p->ign)
806: texiwarn(p, "unexpected \"{\"");
807: advance(p, buf, pos);
808: return;
809: case ('@'):
810: break;
811: default:
812: texiword(p, buf, sz, pos, '\0');
813: return;
814: }
815:
1.7 kristaps 816: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 817: advanceto(p, buf, pos, end);
1.7 kristaps 818: if (NULL != macro)
819: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 820: if (TEXICMD__MAX == cmd)
821: return;
822: if (NULL != texitoks[cmd].fp)
823: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
824: }
825:
826: /*
827: * This is used in the @deffn type of command.
828: * These have an arbitrary number of line arguments; however, these
829: * arguments may or may not be surrounded by brackets.
830: * In this function, we parse each one as either a bracketed or
831: * non-bracketed argument, returning 0 when we've reached the end of
832: * line or 1 otherwise.
833: */
834: int
835: parselinearg(struct texi *p, const char *buf, size_t sz, size_t *pos)
836: {
837:
838: while (*pos < sz && isws(buf[*pos])) {
839: p->seenws = 1;
840: advance(p, buf, pos);
841: }
842:
843: if (*pos < sz && '{' == buf[*pos])
844: parsebracket(p, buf, sz, pos);
1.3 kristaps 845: else if (*pos < sz && '\n' != buf[*pos])
1.1 kristaps 846: parsesingle(p, buf, sz, pos);
847: else
848: return(0);
849:
850: return(1);
851: }
852:
853: /*
854: * Parse til the end of the buffer.
855: */
856: void
857: parseeof(struct texi *p, const char *buf, size_t sz)
858: {
859: size_t pos;
860:
861: for (pos = 0; pos < sz; )
862: parsesingle(p, buf, sz, &pos);
863: }
864:
865: /*
1.8 kristaps 866: * This is like parseeof() except that it's to be invoked on memory
867: * buffers while parsing a larger scope.
868: * This is useful for parsing macro sequences.
869: * The line, column, and name of the calling file context are saved, the
870: * column and line reset, then all of these restored after parse.
871: */
872: void
873: parsemembuf(struct texi *p, const char *buf, size_t sz)
874: {
875: size_t svln, svcol;
876: const char *svname;
877:
878: svln = p->files[p->filepos - 1].line;
879: svcol = p->files[p->filepos - 1].col;
880: svname = p->files[p->filepos - 1].name;
881:
882: p->files[p->filepos - 1].line = 0;
883: p->files[p->filepos - 1].col = 0;
884: p->files[p->filepos - 1].name = "<macro buffer>";
885:
886: parseeof(p, buf, sz);
887:
888: p->files[p->filepos - 1].line = svln;
889: p->files[p->filepos - 1].col = svcol;
890: p->files[p->filepos - 1].name = svname;
891: }
892:
893: /*
1.1 kristaps 894: * Parse a block sequence until we have the "@end endtoken" command
895: * invocation.
896: * This will return immediately at EOF.
897: */
898: void
899: parseto(struct texi *p, const char *buf,
900: size_t sz, size_t *pos, const char *endtoken)
901: {
1.7 kristaps 902: size_t end;
903: enum texicmd cmd;
904: size_t endtoksz;
905: struct teximacro *macro;
1.1 kristaps 906:
907: endtoksz = strlen(endtoken);
908: assert(endtoksz > 0);
909:
910: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
911: switch (buf[*pos]) {
912: case ('}'):
913: if (0 == p->ign)
914: texiwarn(p, "unexpected \"}\"");
915: advance(p, buf, pos);
916: continue;
917: case ('{'):
918: if (0 == p->ign)
919: texiwarn(p, "unexpected \"{\"");
920: advance(p, buf, pos);
921: continue;
922: case ('@'):
923: break;
924: default:
925: texiword(p, buf, sz, pos, '\0');
926: continue;
927: }
928:
1.7 kristaps 929: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 930: advanceto(p, buf, pos, end);
931: if (TEXICMD_END == cmd) {
932: while (*pos < sz && isws(buf[*pos]))
933: advance(p, buf, pos);
934: /*
935: * FIXME: check the full word, not just its
936: * initial substring!
937: */
938: if (sz - *pos >= endtoksz && 0 == strncmp
939: (&buf[*pos], endtoken, endtoksz)) {
940: advanceeoln(p, buf, sz, pos, 0);
941: break;
942: }
943: if (0 == p->ign)
944: texiwarn(p, "unexpected \"end\"");
945: advanceeoln(p, buf, sz, pos, 0);
946: continue;
1.7 kristaps 947: }
948: if (NULL != macro)
949: texiexecmacro(p, macro, buf, sz, pos);
950: if (TEXICMD__MAX == cmd)
951: continue;
952: if (NULL != texitoks[cmd].fp)
953: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1.1 kristaps 954: }
955: }
956:
957: /*
958: * Memory-map the file "fname" and begin parsing it unless "parse" is
959: * zero, in which case we just dump the file to stdout (making sure it
960: * doesn't trip up mdoc(7) along the way).
961: * This can be called in a nested context.
962: */
963: void
964: parsefile(struct texi *p, const char *fname, int parse)
965: {
966: struct texifile *f;
967: int fd;
968: struct stat st;
969: size_t i;
970:
1.5 kristaps 971: if (64 == p->filepos)
1.6 kristaps 972: texierr(p, "too many open files");
1.1 kristaps 973: f = &p->files[p->filepos];
974: memset(f, 0, sizeof(struct texifile));
975:
976: f->name = fname;
977: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
978: texiabort(p, fname);
979: } else if (-1 == fstat(fd, &st)) {
980: close(fd);
981: texiabort(p, fname);
982: }
983:
984: f->mapsz = st.st_size;
985: f->map = mmap(NULL, f->mapsz,
986: PROT_READ, MAP_SHARED, fd, 0);
987: close(fd);
988:
989: if (MAP_FAILED == f->map)
990: texiabort(p, fname);
991:
992: p->filepos++;
993: if ( ! parse) {
994: /*
995: * We're printing verbatim output.
996: * Make sure it doesn't get interpreted as mdoc by
997: * escaping escapes and making sure leading dots don't
998: * trigger mdoc(7) expansion.
999: */
1000: for (i = 0; i < f->mapsz; i++) {
1001: if (i > 0 && '.' == f->map[i])
1002: if ('\n' == f->map[i - 1])
1003: fputs("\\&", stdout);
1004: putchar(f->map[i]);
1005: if ('\\' == f->map[i])
1006: putchar('e');
1007: }
1008: } else
1009: parseeof(p, f->map, f->mapsz);
1010: texifilepop(p);
1011: }
1012:
1.2 kristaps 1013: /*
1014: * Look up the value to a stored pair's value starting in "buf" from
1015: * start to end.
1016: * Return the pointer to the value memory, which can be NULL if the
1017: * pointer key does not exist.
1018: * The pointer can point to NULL if the value has been unset.
1019: */
1020: static char **
1021: valuequery(const struct texi *p,
1022: const char *buf, size_t start, size_t end)
1023: {
1024: size_t i, sz, len;
1025:
1026: assert(end >= start);
1027: /* Ignore zero-length. */
1028: if (0 == (len = (end - start)))
1029: return(NULL);
1030: for (i = 0; i < p->valsz; i++) {
1031: sz = strlen(p->vals[i].key);
1032: if (sz != len)
1033: continue;
1034: if (0 == strncmp(p->vals[i].key, &buf[start], len))
1035: return(&p->vals[i].value);
1036: }
1037: return(NULL);
1038: }
1039:
1040: /*
1041: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1042: * pointer to its value via valuequery().
1043: */
1044: static char **
1045: valuelquery(struct texi *p, const char *buf, size_t sz, size_t *pos)
1046: {
1047: size_t start, end;
1048: char **ret;
1049:
1050: while (*pos < sz && isws(buf[*pos]))
1051: advance(p, buf, pos);
1052: if (*pos == sz)
1053: return(NULL);
1054: for (start = end = *pos; end < sz; end++)
1055: if ('\n' == buf[end])
1056: break;
1057: advanceto(p, buf, pos, end);
1058: if (*pos < sz) {
1059: assert('\n' == buf[*pos]);
1060: advance(p, buf, pos);
1061: }
1062: if (NULL == (ret = valuequery(p, buf, start, end)))
1063: return(NULL);
1064: return(ret);
1065: }
1066:
1067: void
1068: valuelclear(struct texi *p, const char *buf, size_t sz, size_t *pos)
1069: {
1070: char **ret;
1071:
1072: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1073: return;
1074: free(*ret);
1075: *ret = NULL;
1076: }
1077:
1078: const char *
1079: valuellookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1080: {
1081: char **ret;
1082:
1083: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1084: return(NULL);
1085: return(*ret);
1086: }
1087:
1088: /*
1089: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1090: * the pointer to its value.
1091: * If the returned pointer is NULL, either there was no string within
1092: * the brackets (or no brackets), or the value was not found, or the
1093: * value had previously been unset.
1094: */
1095: const char *
1096: valueblookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1097: {
1098: size_t start, end;
1099: char **ret;
1100:
1101: while (*pos < sz && isws(buf[*pos]))
1102: advance(p, buf, pos);
1103: if (*pos == sz || '{' != buf[*pos])
1104: return(NULL);
1105: advance(p, buf, pos);
1106: for (start = end = *pos; end < sz; end++)
1107: if ('}' == buf[end])
1108: break;
1109: advanceto(p, buf, pos, end);
1110: if (*pos < sz) {
1111: assert('}' == buf[*pos]);
1112: advance(p, buf, pos);
1113: }
1114: if (NULL == (ret = valuequery(p, buf, start, end)))
1115: return(NULL);
1116: return(*ret);
1117: }
1118:
1119: void
1120: valueadd(struct texi *p, char *key, char *val)
1121: {
1122: size_t i;
1123:
1124: assert(NULL != key);
1125: assert(NULL != val);
1126:
1127: for (i = 0; i < p->valsz; i++)
1128: if (0 == strcmp(p->vals[i].key, key))
1129: break;
1130:
1131: if (i < p->valsz) {
1132: free(key);
1133: free(p->vals[i].value);
1134: p->vals[i].value = val;
1135: } else {
1.4 kristaps 1136: /* FIXME: reallocarray() */
1.2 kristaps 1137: p->vals = realloc(p->vals,
1138: (p->valsz + 1) *
1139: sizeof(struct texivalue));
1.4 kristaps 1140: if (NULL == p->vals)
1141: texiabort(p, NULL);
1.2 kristaps 1142: p->vals[p->valsz].key = key;
1143: p->vals[p->valsz].value = val;
1144: p->valsz++;
1145: }
1.7 kristaps 1146: }
1147:
1148: /*
1149: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1150: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1151: * an array of arguments of size "argsz".
1152: * These need to be freed individually and as a whole.
1153: * NOTE: this will puke on @, or @} macros, which can trick it into
1154: * stopping argument parsing earlier.
1155: * Ergo, textual: this doesn't interpret the arguments in any way.
1156: */
1157: char **
1158: argparse(struct texi *p, const char *buf,
1.8 kristaps 1159: size_t sz, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1160: {
1161: char **args;
1162: size_t start, end, stack;
1163:
1164: while (*pos < sz && isws(buf[*pos]))
1165: advance(p, buf, pos);
1166:
1167: args = NULL;
1168: *argsz = 0;
1169:
1.10 kristaps 1170: if ('{' != buf[*pos] && hint) {
1171: /*
1172: * Special case: if we encounter an unbracketed argument
1173: * and we're being invoked with non-zero arguments
1174: * (versus being set, i.e., hint>0), then parse until
1175: * the end of line.
1176: */
1177: *argsz = 1;
1178: args = calloc(1, sizeof(char *));
1179: if (NULL == args)
1180: texiabort(p, NULL);
1181: start = *pos;
1182: while (*pos < sz) {
1183: if ('\n' == buf[*pos])
1184: break;
1185: advance(p, buf, pos);
1186: }
1187: args[0] = malloc(*pos - start + 1);
1188: memcpy(args[0], &buf[start], *pos - start);
1189: args[0][*pos - start] = '\0';
1190: if (*pos < sz && '\n' == buf[*pos])
1191: advance(p, buf, pos);
1192: return(args);
1193: } else if ('{' != buf[*pos])
1.7 kristaps 1194: return(args);
1195:
1196: /* Parse til the closing '}', putting into the array. */
1197: advance(p, buf, pos);
1198: while (*pos < sz) {
1199: while (*pos < sz && isws(buf[*pos]))
1200: advance(p, buf, pos);
1201: start = *pos;
1202: stack = 0;
1203: while (*pos < sz) {
1204: /*
1205: * According to the manual, commas within
1206: * embedded commands are escaped.
1207: * We keep track of embedded-ness in the "stack"
1208: * state anyway, so this is free.
1209: */
1.8 kristaps 1210: if (',' == buf[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1211: break;
1212: else if (0 == stack && '}' == buf[*pos])
1213: break;
1214: else if (0 != stack && '}' == buf[*pos])
1215: stack--;
1216: else if ('{' == buf[*pos])
1217: stack++;
1218: advance(p, buf, pos);
1219: }
1220: if (stack)
1221: texiwarn(p, "unterminated macro "
1222: "in macro arguments");
1223: if ((end = *pos) == sz)
1224: break;
1225: /* Test for zero-length '{ }'. */
1226: if (start == end && '}' == buf[*pos] && 0 == *argsz)
1227: break;
1228: /* FIXME: use reallocarray. */
1229: args = realloc
1230: (args, sizeof(char *) *
1231: (*argsz + 1));
1232: if (NULL == args)
1233: texiabort(p, NULL);
1234: args[*argsz] = malloc(end - start + 1);
1235: if (NULL == args[*argsz])
1236: texiabort(p, NULL);
1237: memcpy(args[*argsz],
1238: &buf[start], end - start);
1239: args[*argsz][end - start] = '\0';
1240: (*argsz)++;
1241: if ('}' == buf[*pos])
1242: break;
1243: advance(p, buf, pos);
1244: }
1245:
1246: if (*pos == sz)
1247: texierr(p, "unterminated arguments");
1248: assert('}' == buf[*pos]);
1249: advance(p, buf, pos);
1250: return(args);
1.2 kristaps 1251: }
CVSweb