Annotation of texi2mdoc/util.c, Revision 1.10
1.10 ! kristaps 1: /* $Id: util.c,v 1.9 2015/02/23 14:36:03 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <libgen.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <time.h>
31: #include <unistd.h>
32:
33: #include "extern.h"
34:
35: /*
36: * Unmap the top-most file in the stack of files currently opened (that
37: * is, nested calls to parsefile()).
38: */
39: void
40: texifilepop(struct texi *p)
41: {
42: struct texifile *f;
43:
44: assert(p->filepos > 0);
45: f = &p->files[--p->filepos];
46: munmap(f->map, f->mapsz);
47: }
48:
1.7 kristaps 49: static void
50: teximacrofree(struct teximacro *p)
51: {
52: size_t i;
53:
54: for (i = 0; i < p->argsz; i++)
55: free(p->args[i]);
56:
57: free(p->args);
58: free(p->key);
59: free(p->value);
60: }
61:
62: static void
63: texivaluefree(struct texivalue *p)
64: {
65:
66: free(p->key);
67: free(p->value);
68: }
69:
1.1 kristaps 70: /*
71: * Unmap all files that we're currently using and free all resources
72: * that we've allocated during the parse.
73: * The utility should exit(...) after this is called.
74: */
75: void
76: texiexit(struct texi *p)
77: {
78: size_t i;
79:
80: /* Make sure we're newline-terminated. */
81: if (p->outcol)
82: putchar('\n');
83:
84: /* Unmap all files. */
85: while (p->filepos > 0)
86: texifilepop(p);
87:
1.7 kristaps 88: for (i = 0; i < p->macrosz; i++)
89: teximacrofree(&p->macros[i]);
1.1 kristaps 90: for (i = 0; i < p->dirsz; i++)
91: free(p->dirs[i]);
1.4 kristaps 92: for (i = 0; i < p->indexsz; i++)
93: free(p->indexs[i]);
1.7 kristaps 94: for (i = 0; i < p->valsz; i++)
95: texivaluefree(&p->vals[i]);
1.4 kristaps 96:
1.7 kristaps 97: free(p->macros);
1.1 kristaps 98: free(p->vals);
1.4 kristaps 99: free(p->indexs);
1.1 kristaps 100: free(p->dirs);
101: free(p->subtitle);
102: free(p->title);
103: }
104:
105: /*
106: * Fatal error: unmap all files and exit.
107: * The "errstring" is passed to perror(3).
108: */
109: void
110: texiabort(struct texi *p, const char *errstring)
111: {
112:
113: perror(errstring);
114: texiexit(p);
115: exit(EXIT_FAILURE);
116: }
117:
118: /*
119: * Print a generic warning message (to stderr) tied to our current
120: * location in the parse sequence.
121: */
122: void
123: texiwarn(const struct texi *p, const char *fmt, ...)
124: {
125: va_list ap;
126:
127: fprintf(stderr, "%s:%zu:%zu: warning: ",
128: p->files[p->filepos - 1].name,
129: p->files[p->filepos - 1].line + 1,
130: p->files[p->filepos - 1].col + 1);
131: va_start(ap, fmt);
132: vfprintf(stderr, fmt, ap);
133: va_end(ap);
134: fputc('\n', stderr);
135: }
136:
137: /*
138: * Print an error message (to stderr) tied to our current location in
139: * the parse sequence, invoke texiexit(), then die.
140: */
141: void
142: texierr(struct texi *p, const char *fmt, ...)
143: {
144: va_list ap;
145:
146: fprintf(stderr, "%s:%zu:%zu: error: ",
147: p->files[p->filepos - 1].name,
148: p->files[p->filepos - 1].line + 1,
149: p->files[p->filepos - 1].col + 1);
150: va_start(ap, fmt);
151: vfprintf(stderr, fmt, ap);
152: va_end(ap);
153: fputc('\n', stderr);
154: texiexit(p);
155: exit(EXIT_FAILURE);
156: }
157:
158: /*
159: * Put a single data character to the output if we're not ignoring.
160: * Makes sure we don't spurriously start a macro.
161: * Adjusts our output status.
162: * This shouldn't be called for macros: just for ordinary text.
163: */
164: void
165: texiputchar(struct texi *p, char c)
166: {
167:
168: if (p->ign)
169: return;
170:
171: if ('.' == c && 0 == p->outcol)
172: fputs("\\&", stdout);
1.10 ! kristaps 173: if ('\'' == c && 0 == p->outcol)
! 174: fputs("\\&", stdout);
1.1 kristaps 175:
176: putchar(c);
177: p->seenvs = 0;
178: if ('\n' == c) {
179: p->outcol = 0;
180: p->seenws = 0;
181: } else
182: p->outcol++;
183: }
184:
185: /*
186: * Put multiple characters (see texiputchar()).
187: * This shouldn't be called for macros: just for ordinary text.
188: */
189: void
190: texiputchars(struct texi *p, const char *s)
191: {
192:
193: while ('\0' != *s)
194: texiputchar(p, *s++);
1.9 kristaps 195: }
196:
197: /*
198: * This puts all characters onto the output stream but makes sure to
199: * escape mdoc(7) slashes.
200: */
201: void
202: texiputbuf(struct texi *p, const char *buf, size_t start, size_t end)
203: {
204:
205: for ( ; start < end; start++) {
206: texiputchar(p, buf[start]);
207: if ('\\' == buf[start])
208: texiputchar(p, 'e');
209: }
1.1 kristaps 210: }
211:
212: /*
213: * Close an mdoc(7) macro opened with teximacroopen().
214: * If there are no more macros on the line, prints a newline.
215: */
216: void
217: teximacroclose(struct texi *p)
218: {
219:
220: if (p->ign)
221: return;
222:
223: if (0 == --p->outmacro) {
224: putchar('\n');
225: p->outcol = p->seenws = 0;
226: }
227: }
228:
229: /*
230: * Open a mdoc(7) macro.
231: * This is used for line macros, e.g., Qq [foo bar baz].
232: * It can be invoked for nested macros, e.g., Qq Li foo .
233: * TODO: flush-right punctuation (e.g., parenthesis).
234: */
235: void
236: teximacroopen(struct texi *p, const char *s)
237: {
238: int rc;
239:
240: if (p->ign)
241: return;
242:
243: if (p->outcol && 0 == p->outmacro) {
244: putchar('\n');
245: p->outcol = 0;
246: }
247:
248: if (0 == p->outmacro)
249: putchar('.');
250: else
251: putchar(' ');
252:
253: if (EOF != (rc = fputs(s, stdout)))
254: p->outcol += rc;
255:
256: putchar(' ');
257: p->outcol++;
258: p->outmacro++;
259: p->seenws = 0;
260: }
261:
262: /*
263: * Put a stadnalone mdoc(7) command with the trailing newline.
264: */
265: void
266: teximacro(struct texi *p, const char *s)
267: {
268:
269: if (p->ign)
270: return;
271:
272: if (p->outmacro)
273: texierr(p, "\"%s\" in open line scope!?", s);
274: if (p->literal)
275: texierr(p, "\"%s\" in a literal scope!?", s);
276:
277: if (p->outcol)
278: putchar('\n');
279:
280: putchar('.');
281: puts(s);
282: p->outcol = p->seenws = 0;
283: }
284:
285: /*
286: * Introduce vertical space during normal (non-macro) input.
287: */
288: void
289: texivspace(struct texi *p)
290: {
291:
1.5 kristaps 292: if (p->seenvs || TEXILIST_TABLE == p->list)
1.1 kristaps 293: return;
294: teximacro(p, "Pp");
295: p->seenvs = 1;
296: }
297:
298: /*
299: * Advance by a single byte in the input stream, adjusting our location
300: * in the current input file.
301: */
302: void
303: advance(struct texi *p, const char *buf, size_t *pos)
304: {
305:
306: if ('\n' == buf[*pos]) {
307: p->files[p->filepos - 1].line++;
308: p->files[p->filepos - 1].col = 0;
309: } else
310: p->files[p->filepos - 1].col++;
311:
312: (*pos)++;
313: }
314:
315: /*
316: * It's common to wait punctuation to float on the right side of macro
317: * lines in mdoc(7), e.g., ".Em hello ) ."
318: * This function does so, and should be called before teximacroclose().
319: * It will detect that it's the last in the nested macros and
320: * appropriately flush-left punctuation alongside the macro.
321: */
322: void
323: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
324: {
325: size_t start, end;
326:
327: if (1 != p->outmacro)
328: return;
329:
330: for (start = end = *pos; end < sz; end++) {
331: switch (buf[end]) {
332: case (','):
333: case (')'):
334: case ('.'):
335: case ('"'):
336: case (':'):
337: case ('!'):
338: case ('?'):
339: continue;
340: default:
341: break;
342: }
343: break;
344: }
345: if (end == *pos)
346: return;
347: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
348: for ( ; start < end; start++) {
349: texiputchar(p, ' ');
350: texiputchar(p, buf[start]);
351: advance(p, buf, pos);
352: }
353: }
354: }
355:
356: /*
357: * Advance to the next non-whitespace word in the input stream.
358: * If we're in literal mode, then print all of the whitespace as we're
359: * doing so.
360: */
361: static size_t
362: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
363: {
364:
365: if (p->literal) {
366: while (*pos < sz && ismspace(buf[*pos])) {
367: if (*pos && '\n' == buf[*pos] &&
368: '\\' == buf[*pos - 1])
369: texiputchar(p, 'e');
370: texiputchar(p, buf[*pos]);
371: advance(p, buf, pos);
372: }
373: return(*pos);
374: }
375:
376: while (*pos < sz && ismspace(buf[*pos])) {
377: p->seenws = 1;
378: /*
379: * If it looks like we've printed a double-line, then
380: * output a paragraph.
381: * FIXME: this is stupid.
382: */
383: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
384: texivspace(p);
385: advance(p, buf, pos);
386: }
387: return(*pos);
388: }
389:
390: /*
391: * Advance to the EOLN in the input stream.
392: * NOTE: THIS SHOULD NOT BE CALLED ON BLANK TEXT, as it will read up to
393: * the @\n.
394: */
395: size_t
396: advanceeoln(struct texi *p, const char *buf,
397: size_t sz, size_t *pos, int consumenl)
398: {
399:
400: while (*pos < sz && '\n' != buf[*pos])
401: advance(p, buf, pos);
402: if (*pos < sz && consumenl)
403: advance(p, buf, pos);
404: return(*pos);
405: }
406:
407: /*
408: * Advance to position "end", which is an absolute position in the
409: * current buffer greater than or equal to the current position.
410: */
411: void
412: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
413: {
414:
415: assert(*pos <= end);
416: while (*pos < end)
417: advance(p, buf, pos);
418: }
419:
1.7 kristaps 420: static void
421: texiexecmacro(struct texi *p, struct teximacro *m,
422: const char *buf, size_t sz, size_t *pos)
423: {
424: size_t valsz, realsz, aasz, asz,
425: ssz, i, j, k, start, end;
426: char *val;
427: char **args;
428:
1.8 kristaps 429: args = argparse(p, buf, sz, pos, &asz, m->argsz);
1.7 kristaps 430: if (asz != m->argsz)
431: texiwarn(p, "invalid macro argument length");
432: aasz = asz < m->argsz ? asz : m->argsz;
433:
434: if (0 == aasz) {
1.8 kristaps 435: parsemembuf(p, m->value, strlen(m->value));
1.7 kristaps 436: return;
437: }
438:
439: valsz = realsz = strlen(m->value);
440: val = strdup(m->value);
441:
442: for (i = j = 0; i < realsz; i++) {
443: /* Parse blindly til the backslash delimiter. */
444: if ('\\' != m->value[i]) {
445: val[j++] = m->value[i];
446: val[j] = '\0';
447: continue;
448: } else if (i == realsz - 1)
449: texierr(p, "trailing argument name delimiter");
450:
451: /* Double-backslash is escaped. */
452: if ('\\' == m->value[i + 1]) {
453: val[j++] = m->value[i++];
454: val[j] = '\0';
455: continue;
456: }
457:
458: assert('\\' == m->value[i] && i < realsz - 1);
459:
460: /* Parse to terminating delimiter. */
461: /* FIXME: embedded, escaped delimiters? */
462: for (start = end = i + 1; end < realsz; end++)
463: if ('\\' == m->value[end])
464: break;
465: if (end == realsz)
466: texierr(p, "unterminated argument name");
467:
468: for (k = 0; k < aasz; k++) {
469: if ((ssz = strlen(m->args[k])) != (end - start))
470: continue;
471: if (strncmp(&m->value[start], m->args[k], ssz))
472: continue;
473: break;
474: }
475:
476: /*
477: * Argument didn't exist in argument table.
478: * No need to reallocate here: we just copy the text
479: * directly from the macro value into the buffer.
480: */
481: if (k == aasz) {
482: for ( ; i < end; i++)
483: val[j++] = m->value[i];
484: assert('\\' == m->value[i]);
485: val[j++] = m->value[i];
486: val[j] = '\0';
487: continue;
488: }
489:
490: if (strlen(args[k]) > ssz) {
491: valsz += strlen(args[k]);
492: val = realloc(val, valsz + 1);
493: if (NULL == val)
494: texiabort(p, NULL);
495: }
496:
497: j = strlcat(val, args[k], valsz + 1);
498: i = end;
499: }
500:
1.8 kristaps 501: parsemembuf(p, val, strlen(val));
1.7 kristaps 502:
503: for (i = 0; i < asz; i++)
504: free(args[i]);
505: free(args);
506: free(val);
507: }
508:
1.1 kristaps 509: /*
510: * Output a free-form word in the input stream, progressing to the next
511: * command or white-space.
512: * This also will advance the input stream.
513: */
514: static void
515: texiword(struct texi *p, const char *buf,
516: size_t sz, size_t *pos, char extra)
517: {
518:
519: if (p->seenws && 0 == p->outmacro &&
520: p->outcol > 72 && 0 == p->literal)
521: texiputchar(p, '\n');
522: /* FIXME: abstract this: we use it elsewhere. */
523: if (p->seenws && p->outcol && 0 == p->literal)
524: texiputchar(p, ' ');
525:
526: p->seenws = 0;
527:
528: while (*pos < sz && ! ismspace(buf[*pos])) {
529: switch (buf[*pos]) {
530: case ('@'):
531: case ('}'):
532: case ('{'):
533: return;
534: }
535: if ('\0' != extra && buf[*pos] == extra)
536: return;
537: if (*pos < sz - 1 &&
538: '`' == buf[*pos] &&
539: '`' == buf[*pos + 1]) {
540: texiputchars(p, "\\(lq");
541: advance(p, buf, pos);
542: } else if (*pos < sz - 1 &&
543: '\'' == buf[*pos] &&
544: '\'' == buf[*pos + 1]) {
545: texiputchars(p, "\\(rq");
546: advance(p, buf, pos);
1.10 ! kristaps 547: } else if ('\\' == buf[*pos]) {
! 548: texiputchar(p, buf[*pos]);
! 549: texiputchar(p, 'e');
1.1 kristaps 550: } else
551: texiputchar(p, buf[*pos]);
552: advance(p, buf, pos);
553: }
554: }
555:
556: /*
557: * Look up the command at position "pos" in the buffer, returning it (or
558: * TEXICMD__MAX if none found) and setting "end" to be the absolute
559: * index after the command name.
560: */
561: enum texicmd
1.7 kristaps 562: texicmd(struct texi *p, const char *buf, size_t pos,
563: size_t sz, size_t *end, struct teximacro **macro)
1.1 kristaps 564: {
1.4 kristaps 565: size_t i, len, toksz;
1.1 kristaps 566:
567: assert('@' == buf[pos]);
568:
1.7 kristaps 569: if (NULL != macro)
570: *macro = NULL;
571:
1.1 kristaps 572: if ((*end = pos) == sz)
573: return(TEXICMD__MAX);
574: else if ((*end = ++pos) == sz)
575: return(TEXICMD__MAX);
576:
577: /* Alphabetic commands are special. */
578: if ( ! isalpha(buf[pos])) {
579: if ((*end = pos + 1) == sz)
580: return(TEXICMD__MAX);
581: for (i = 0; i < TEXICMD__MAX; i++) {
582: if (1 != texitoks[i].len)
583: continue;
584: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
585: return(i);
586: }
587: texiwarn(p, "bad command: @%c", buf[pos]);
588: return(TEXICMD__MAX);
589: }
590:
1.4 kristaps 591: /* Scan to the end of the possible command name. */
1.1 kristaps 592: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
593: if ((*end > pos && ('@' == buf[*end] ||
594: '{' == buf[*end] || '}' == buf[*end])))
595: break;
596:
1.4 kristaps 597: /* Look for the command. */
1.1 kristaps 598: len = *end - pos;
599: for (i = 0; i < TEXICMD__MAX; i++) {
600: if (len != texitoks[i].len)
601: continue;
602: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
603: return(i);
604: }
605:
1.4 kristaps 606: /* Look for it in our indices. */
607: for (i = 0; i < p->indexsz; i++) {
608: toksz = strlen(p->indexs[i]);
609: if (len != 5 + toksz)
610: continue;
611: if (strncmp(&buf[pos], p->indexs[i], toksz))
612: continue;
613: if (0 == strncmp(&buf[pos + toksz], "index", 5))
1.7 kristaps 614: return(TEXICMD_USER_INDEX);
615: }
616:
617: for (i = 0; i < p->macrosz; i++) {
618: if (len != strlen(p->macros[i].key))
619: continue;
620: if (strncmp(&buf[pos], p->macros[i].key, len))
621: continue;
622: if (NULL != macro)
623: *macro = &p->macros[i];
624: return(TEXICMD__MAX);
1.4 kristaps 625: }
626:
1.1 kristaps 627: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
628: return(TEXICMD__MAX);
629: }
630:
631: /*
632: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
633: * Num should be set to the argument we're currently parsing, although
634: * it suffixes for it to be zero or non-zero.
635: * This will return 1 if there are more arguments, 0 otherwise.
636: * This will stop (returning 0) in the event of EOF or if we're not at a
637: * bracket for the zeroth parse.
638: */
639: int
640: parsearg(struct texi *p, const char *buf,
641: size_t sz, size_t *pos, size_t num)
642: {
1.7 kristaps 643: size_t end;
644: enum texicmd cmd;
645: struct teximacro *macro;
1.1 kristaps 646:
647: while (*pos < sz && ismspace(buf[*pos]))
648: advance(p, buf, pos);
649: if (*pos == sz || (0 == num && '{' != buf[*pos]))
650: return(0);
651: if (0 == num)
652: advance(p, buf, pos);
653:
654: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
655: switch (buf[*pos]) {
656: case (','):
657: advance(p, buf, pos);
658: return(1);
659: case ('}'):
660: advance(p, buf, pos);
661: return(0);
662: case ('{'):
663: if (0 == p->ign)
664: texiwarn(p, "unexpected \"{\"");
665: advance(p, buf, pos);
666: continue;
667: case ('@'):
668: break;
669: default:
670: texiword(p, buf, sz, pos, ',');
671: continue;
672: }
673:
1.7 kristaps 674: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 675: advanceto(p, buf, pos, end);
1.7 kristaps 676: if (NULL != macro)
677: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 678: if (TEXICMD__MAX == cmd)
679: continue;
680: if (NULL != texitoks[cmd].fp)
681: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
682: }
683: return(0);
684: }
685:
686: /*
687: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
688: * This will stop in the event of EOF or if we're not at a bracket.
689: */
690: void
691: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
692: {
1.7 kristaps 693: size_t end;
694: enum texicmd cmd;
695: struct teximacro *macro;
1.1 kristaps 696:
697: while (*pos < sz && ismspace(buf[*pos]))
698: advance(p, buf, pos);
699:
700: if (*pos == sz || '{' != buf[*pos])
701: return;
702: advance(p, buf, pos);
703:
704: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
705: switch (buf[*pos]) {
706: case ('}'):
707: advance(p, buf, pos);
708: return;
709: case ('{'):
710: if (0 == p->ign)
711: texiwarn(p, "unexpected \"{\"");
712: advance(p, buf, pos);
713: continue;
714: case ('@'):
715: break;
716: default:
717: texiword(p, buf, sz, pos, '\0');
718: continue;
719: }
720:
1.7 kristaps 721: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 722: advanceto(p, buf, pos, end);
1.7 kristaps 723: if (NULL != macro)
724: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 725: if (TEXICMD__MAX == cmd)
726: continue;
727: if (NULL != texitoks[cmd].fp)
728: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
729: }
730: }
731:
732: /*
733: * This should be invoked when we're on a macro line and want to process
734: * to the end of the current input line, doing all of our macros along
735: * the way.
736: */
737: void
738: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
739: {
1.7 kristaps 740: size_t end;
741: enum texicmd cmd;
742: struct teximacro *macro;
1.1 kristaps 743:
744: while (*pos < sz && '\n' != buf[*pos]) {
745: while (*pos < sz && isws(buf[*pos])) {
746: p->seenws = 1;
747: if (p->literal)
748: texiputchar(p, buf[*pos]);
749: advance(p, buf, pos);
750: }
751: switch (buf[*pos]) {
752: case ('}'):
753: if (0 == p->ign)
754: texiwarn(p, "unexpected \"}\"");
755: advance(p, buf, pos);
756: continue;
757: case ('{'):
758: if (0 == p->ign)
759: texiwarn(p, "unexpected \"{\"");
760: advance(p, buf, pos);
761: continue;
762: case ('@'):
763: break;
764: default:
765: texiword(p, buf, sz, pos, '\0');
766: continue;
767: }
768:
1.7 kristaps 769: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 770: advanceto(p, buf, pos, end);
1.7 kristaps 771: if (NULL != macro)
772: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 773: if (TEXICMD__MAX == cmd)
774: continue;
775: if (NULL != texitoks[cmd].fp)
776: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
777: }
778: }
779:
780: /*
781: * Parse a single word or command.
782: * This will return immediately at the EOF.
783: */
784: void
785: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
786: {
1.7 kristaps 787: size_t end;
788: enum texicmd cmd;
789: struct teximacro *macro;
1.1 kristaps 790:
791: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
792: return;
793:
794: switch (buf[*pos]) {
795: case ('}'):
796: if (0 == p->ign)
797: texiwarn(p, "unexpected \"}\"");
798: advance(p, buf, pos);
799: return;
800: case ('{'):
801: if (0 == p->ign)
802: texiwarn(p, "unexpected \"{\"");
803: advance(p, buf, pos);
804: return;
805: case ('@'):
806: break;
807: default:
808: texiword(p, buf, sz, pos, '\0');
809: return;
810: }
811:
1.7 kristaps 812: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 813: advanceto(p, buf, pos, end);
1.7 kristaps 814: if (NULL != macro)
815: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 816: if (TEXICMD__MAX == cmd)
817: return;
818: if (NULL != texitoks[cmd].fp)
819: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
820: }
821:
822: /*
823: * This is used in the @deffn type of command.
824: * These have an arbitrary number of line arguments; however, these
825: * arguments may or may not be surrounded by brackets.
826: * In this function, we parse each one as either a bracketed or
827: * non-bracketed argument, returning 0 when we've reached the end of
828: * line or 1 otherwise.
829: */
830: int
831: parselinearg(struct texi *p, const char *buf, size_t sz, size_t *pos)
832: {
833:
834: while (*pos < sz && isws(buf[*pos])) {
835: p->seenws = 1;
836: advance(p, buf, pos);
837: }
838:
839: if (*pos < sz && '{' == buf[*pos])
840: parsebracket(p, buf, sz, pos);
1.3 kristaps 841: else if (*pos < sz && '\n' != buf[*pos])
1.1 kristaps 842: parsesingle(p, buf, sz, pos);
843: else
844: return(0);
845:
846: return(1);
847: }
848:
849: /*
850: * Parse til the end of the buffer.
851: */
852: void
853: parseeof(struct texi *p, const char *buf, size_t sz)
854: {
855: size_t pos;
856:
857: for (pos = 0; pos < sz; )
858: parsesingle(p, buf, sz, &pos);
859: }
860:
861: /*
1.8 kristaps 862: * This is like parseeof() except that it's to be invoked on memory
863: * buffers while parsing a larger scope.
864: * This is useful for parsing macro sequences.
865: * The line, column, and name of the calling file context are saved, the
866: * column and line reset, then all of these restored after parse.
867: */
868: void
869: parsemembuf(struct texi *p, const char *buf, size_t sz)
870: {
871: size_t svln, svcol;
872: const char *svname;
873:
874: svln = p->files[p->filepos - 1].line;
875: svcol = p->files[p->filepos - 1].col;
876: svname = p->files[p->filepos - 1].name;
877:
878: p->files[p->filepos - 1].line = 0;
879: p->files[p->filepos - 1].col = 0;
880: p->files[p->filepos - 1].name = "<macro buffer>";
881:
882: parseeof(p, buf, sz);
883:
884: p->files[p->filepos - 1].line = svln;
885: p->files[p->filepos - 1].col = svcol;
886: p->files[p->filepos - 1].name = svname;
887: }
888:
889: /*
1.1 kristaps 890: * Parse a block sequence until we have the "@end endtoken" command
891: * invocation.
892: * This will return immediately at EOF.
893: */
894: void
895: parseto(struct texi *p, const char *buf,
896: size_t sz, size_t *pos, const char *endtoken)
897: {
1.7 kristaps 898: size_t end;
899: enum texicmd cmd;
900: size_t endtoksz;
901: struct teximacro *macro;
1.1 kristaps 902:
903: endtoksz = strlen(endtoken);
904: assert(endtoksz > 0);
905:
906: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
907: switch (buf[*pos]) {
908: case ('}'):
909: if (0 == p->ign)
910: texiwarn(p, "unexpected \"}\"");
911: advance(p, buf, pos);
912: continue;
913: case ('{'):
914: if (0 == p->ign)
915: texiwarn(p, "unexpected \"{\"");
916: advance(p, buf, pos);
917: continue;
918: case ('@'):
919: break;
920: default:
921: texiword(p, buf, sz, pos, '\0');
922: continue;
923: }
924:
1.7 kristaps 925: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 926: advanceto(p, buf, pos, end);
927: if (TEXICMD_END == cmd) {
928: while (*pos < sz && isws(buf[*pos]))
929: advance(p, buf, pos);
930: /*
931: * FIXME: check the full word, not just its
932: * initial substring!
933: */
934: if (sz - *pos >= endtoksz && 0 == strncmp
935: (&buf[*pos], endtoken, endtoksz)) {
936: advanceeoln(p, buf, sz, pos, 0);
937: break;
938: }
939: if (0 == p->ign)
940: texiwarn(p, "unexpected \"end\"");
941: advanceeoln(p, buf, sz, pos, 0);
942: continue;
1.7 kristaps 943: }
944: if (NULL != macro)
945: texiexecmacro(p, macro, buf, sz, pos);
946: if (TEXICMD__MAX == cmd)
947: continue;
948: if (NULL != texitoks[cmd].fp)
949: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1.1 kristaps 950: }
951: }
952:
953: /*
954: * Memory-map the file "fname" and begin parsing it unless "parse" is
955: * zero, in which case we just dump the file to stdout (making sure it
956: * doesn't trip up mdoc(7) along the way).
957: * This can be called in a nested context.
958: */
959: void
960: parsefile(struct texi *p, const char *fname, int parse)
961: {
962: struct texifile *f;
963: int fd;
964: struct stat st;
965: size_t i;
966:
1.5 kristaps 967: if (64 == p->filepos)
1.6 kristaps 968: texierr(p, "too many open files");
1.1 kristaps 969: f = &p->files[p->filepos];
970: memset(f, 0, sizeof(struct texifile));
971:
972: f->name = fname;
973: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
974: texiabort(p, fname);
975: } else if (-1 == fstat(fd, &st)) {
976: close(fd);
977: texiabort(p, fname);
978: }
979:
980: f->mapsz = st.st_size;
981: f->map = mmap(NULL, f->mapsz,
982: PROT_READ, MAP_SHARED, fd, 0);
983: close(fd);
984:
985: if (MAP_FAILED == f->map)
986: texiabort(p, fname);
987:
988: p->filepos++;
989: if ( ! parse) {
990: /*
991: * We're printing verbatim output.
992: * Make sure it doesn't get interpreted as mdoc by
993: * escaping escapes and making sure leading dots don't
994: * trigger mdoc(7) expansion.
995: */
996: for (i = 0; i < f->mapsz; i++) {
997: if (i > 0 && '.' == f->map[i])
998: if ('\n' == f->map[i - 1])
999: fputs("\\&", stdout);
1000: putchar(f->map[i]);
1001: if ('\\' == f->map[i])
1002: putchar('e');
1003: }
1004: } else
1005: parseeof(p, f->map, f->mapsz);
1006: texifilepop(p);
1007: }
1008:
1.2 kristaps 1009: /*
1010: * Look up the value to a stored pair's value starting in "buf" from
1011: * start to end.
1012: * Return the pointer to the value memory, which can be NULL if the
1013: * pointer key does not exist.
1014: * The pointer can point to NULL if the value has been unset.
1015: */
1016: static char **
1017: valuequery(const struct texi *p,
1018: const char *buf, size_t start, size_t end)
1019: {
1020: size_t i, sz, len;
1021:
1022: assert(end >= start);
1023: /* Ignore zero-length. */
1024: if (0 == (len = (end - start)))
1025: return(NULL);
1026: for (i = 0; i < p->valsz; i++) {
1027: sz = strlen(p->vals[i].key);
1028: if (sz != len)
1029: continue;
1030: if (0 == strncmp(p->vals[i].key, &buf[start], len))
1031: return(&p->vals[i].value);
1032: }
1033: return(NULL);
1034: }
1035:
1036: /*
1037: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1038: * pointer to its value via valuequery().
1039: */
1040: static char **
1041: valuelquery(struct texi *p, const char *buf, size_t sz, size_t *pos)
1042: {
1043: size_t start, end;
1044: char **ret;
1045:
1046: while (*pos < sz && isws(buf[*pos]))
1047: advance(p, buf, pos);
1048: if (*pos == sz)
1049: return(NULL);
1050: for (start = end = *pos; end < sz; end++)
1051: if ('\n' == buf[end])
1052: break;
1053: advanceto(p, buf, pos, end);
1054: if (*pos < sz) {
1055: assert('\n' == buf[*pos]);
1056: advance(p, buf, pos);
1057: }
1058: if (NULL == (ret = valuequery(p, buf, start, end)))
1059: return(NULL);
1060: return(ret);
1061: }
1062:
1063: void
1064: valuelclear(struct texi *p, const char *buf, size_t sz, size_t *pos)
1065: {
1066: char **ret;
1067:
1068: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1069: return;
1070: free(*ret);
1071: *ret = NULL;
1072: }
1073:
1074: const char *
1075: valuellookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1076: {
1077: char **ret;
1078:
1079: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1080: return(NULL);
1081: return(*ret);
1082: }
1083:
1084: /*
1085: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1086: * the pointer to its value.
1087: * If the returned pointer is NULL, either there was no string within
1088: * the brackets (or no brackets), or the value was not found, or the
1089: * value had previously been unset.
1090: */
1091: const char *
1092: valueblookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1093: {
1094: size_t start, end;
1095: char **ret;
1096:
1097: while (*pos < sz && isws(buf[*pos]))
1098: advance(p, buf, pos);
1099: if (*pos == sz || '{' != buf[*pos])
1100: return(NULL);
1101: advance(p, buf, pos);
1102: for (start = end = *pos; end < sz; end++)
1103: if ('}' == buf[end])
1104: break;
1105: advanceto(p, buf, pos, end);
1106: if (*pos < sz) {
1107: assert('}' == buf[*pos]);
1108: advance(p, buf, pos);
1109: }
1110: if (NULL == (ret = valuequery(p, buf, start, end)))
1111: return(NULL);
1112: return(*ret);
1113: }
1114:
1115: void
1116: valueadd(struct texi *p, char *key, char *val)
1117: {
1118: size_t i;
1119:
1120: assert(NULL != key);
1121: assert(NULL != val);
1122:
1123: for (i = 0; i < p->valsz; i++)
1124: if (0 == strcmp(p->vals[i].key, key))
1125: break;
1126:
1127: if (i < p->valsz) {
1128: free(key);
1129: free(p->vals[i].value);
1130: p->vals[i].value = val;
1131: } else {
1.4 kristaps 1132: /* FIXME: reallocarray() */
1.2 kristaps 1133: p->vals = realloc(p->vals,
1134: (p->valsz + 1) *
1135: sizeof(struct texivalue));
1.4 kristaps 1136: if (NULL == p->vals)
1137: texiabort(p, NULL);
1.2 kristaps 1138: p->vals[p->valsz].key = key;
1139: p->vals[p->valsz].value = val;
1140: p->valsz++;
1141: }
1.7 kristaps 1142: }
1143:
1144: /*
1145: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1146: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1147: * an array of arguments of size "argsz".
1148: * These need to be freed individually and as a whole.
1149: * NOTE: this will puke on @, or @} macros, which can trick it into
1150: * stopping argument parsing earlier.
1151: * Ergo, textual: this doesn't interpret the arguments in any way.
1152: */
1153: char **
1154: argparse(struct texi *p, const char *buf,
1.8 kristaps 1155: size_t sz, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1156: {
1157: char **args;
1158: size_t start, end, stack;
1159:
1160: while (*pos < sz && isws(buf[*pos]))
1161: advance(p, buf, pos);
1162:
1163: args = NULL;
1164: *argsz = 0;
1165:
1.10 ! kristaps 1166: if ('{' != buf[*pos] && hint) {
! 1167: /*
! 1168: * Special case: if we encounter an unbracketed argument
! 1169: * and we're being invoked with non-zero arguments
! 1170: * (versus being set, i.e., hint>0), then parse until
! 1171: * the end of line.
! 1172: */
! 1173: *argsz = 1;
! 1174: args = calloc(1, sizeof(char *));
! 1175: if (NULL == args)
! 1176: texiabort(p, NULL);
! 1177: start = *pos;
! 1178: while (*pos < sz) {
! 1179: if ('\n' == buf[*pos])
! 1180: break;
! 1181: advance(p, buf, pos);
! 1182: }
! 1183: args[0] = malloc(*pos - start + 1);
! 1184: memcpy(args[0], &buf[start], *pos - start);
! 1185: args[0][*pos - start] = '\0';
! 1186: if (*pos < sz && '\n' == buf[*pos])
! 1187: advance(p, buf, pos);
! 1188: return(args);
! 1189: } else if ('{' != buf[*pos])
1.7 kristaps 1190: return(args);
1191:
1192: /* Parse til the closing '}', putting into the array. */
1193: advance(p, buf, pos);
1194: while (*pos < sz) {
1195: while (*pos < sz && isws(buf[*pos]))
1196: advance(p, buf, pos);
1197: start = *pos;
1198: stack = 0;
1199: while (*pos < sz) {
1200: /*
1201: * According to the manual, commas within
1202: * embedded commands are escaped.
1203: * We keep track of embedded-ness in the "stack"
1204: * state anyway, so this is free.
1205: */
1.8 kristaps 1206: if (',' == buf[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1207: break;
1208: else if (0 == stack && '}' == buf[*pos])
1209: break;
1210: else if (0 != stack && '}' == buf[*pos])
1211: stack--;
1212: else if ('{' == buf[*pos])
1213: stack++;
1214: advance(p, buf, pos);
1215: }
1216: if (stack)
1217: texiwarn(p, "unterminated macro "
1218: "in macro arguments");
1219: if ((end = *pos) == sz)
1220: break;
1221: /* Test for zero-length '{ }'. */
1222: if (start == end && '}' == buf[*pos] && 0 == *argsz)
1223: break;
1224: if (start == end)
1225: texierr(p, "zero-length argument");
1226: /* FIXME: use reallocarray. */
1227: args = realloc
1228: (args, sizeof(char *) *
1229: (*argsz + 1));
1230: if (NULL == args)
1231: texiabort(p, NULL);
1232: args[*argsz] = malloc(end - start + 1);
1233: if (NULL == args[*argsz])
1234: texiabort(p, NULL);
1235: memcpy(args[*argsz],
1236: &buf[start], end - start);
1237: args[*argsz][end - start] = '\0';
1238: (*argsz)++;
1239: if ('}' == buf[*pos])
1240: break;
1241: advance(p, buf, pos);
1242: }
1243:
1244: if (*pos == sz)
1245: texierr(p, "unterminated arguments");
1246: assert('}' == buf[*pos]);
1247: advance(p, buf, pos);
1248: return(args);
1.2 kristaps 1249: }
CVSweb