Annotation of texi2mdoc/util.c, Revision 1.9
1.9 ! kristaps 1: /* $Id: util.c,v 1.8 2015/02/23 11:56:39 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <libgen.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <time.h>
31: #include <unistd.h>
32:
33: #include "extern.h"
34:
35: /*
36: * Unmap the top-most file in the stack of files currently opened (that
37: * is, nested calls to parsefile()).
38: */
39: void
40: texifilepop(struct texi *p)
41: {
42: struct texifile *f;
43:
44: assert(p->filepos > 0);
45: f = &p->files[--p->filepos];
46: munmap(f->map, f->mapsz);
47: }
48:
1.7 kristaps 49: static void
50: teximacrofree(struct teximacro *p)
51: {
52: size_t i;
53:
54: for (i = 0; i < p->argsz; i++)
55: free(p->args[i]);
56:
57: free(p->args);
58: free(p->key);
59: free(p->value);
60: }
61:
62: static void
63: texivaluefree(struct texivalue *p)
64: {
65:
66: free(p->key);
67: free(p->value);
68: }
69:
1.1 kristaps 70: /*
71: * Unmap all files that we're currently using and free all resources
72: * that we've allocated during the parse.
73: * The utility should exit(...) after this is called.
74: */
75: void
76: texiexit(struct texi *p)
77: {
78: size_t i;
79:
80: /* Make sure we're newline-terminated. */
81: if (p->outcol)
82: putchar('\n');
83:
84: /* Unmap all files. */
85: while (p->filepos > 0)
86: texifilepop(p);
87:
1.7 kristaps 88: for (i = 0; i < p->macrosz; i++)
89: teximacrofree(&p->macros[i]);
1.1 kristaps 90: for (i = 0; i < p->dirsz; i++)
91: free(p->dirs[i]);
1.4 kristaps 92: for (i = 0; i < p->indexsz; i++)
93: free(p->indexs[i]);
1.7 kristaps 94: for (i = 0; i < p->valsz; i++)
95: texivaluefree(&p->vals[i]);
1.4 kristaps 96:
1.7 kristaps 97: free(p->macros);
1.1 kristaps 98: free(p->vals);
1.4 kristaps 99: free(p->indexs);
1.1 kristaps 100: free(p->dirs);
101: free(p->subtitle);
102: free(p->title);
103: }
104:
105: /*
106: * Fatal error: unmap all files and exit.
107: * The "errstring" is passed to perror(3).
108: */
109: void
110: texiabort(struct texi *p, const char *errstring)
111: {
112:
113: perror(errstring);
114: texiexit(p);
115: exit(EXIT_FAILURE);
116: }
117:
118: /*
119: * Print a generic warning message (to stderr) tied to our current
120: * location in the parse sequence.
121: */
122: void
123: texiwarn(const struct texi *p, const char *fmt, ...)
124: {
125: va_list ap;
126:
127: fprintf(stderr, "%s:%zu:%zu: warning: ",
128: p->files[p->filepos - 1].name,
129: p->files[p->filepos - 1].line + 1,
130: p->files[p->filepos - 1].col + 1);
131: va_start(ap, fmt);
132: vfprintf(stderr, fmt, ap);
133: va_end(ap);
134: fputc('\n', stderr);
135: }
136:
137: /*
138: * Print an error message (to stderr) tied to our current location in
139: * the parse sequence, invoke texiexit(), then die.
140: */
141: void
142: texierr(struct texi *p, const char *fmt, ...)
143: {
144: va_list ap;
145:
146: fprintf(stderr, "%s:%zu:%zu: error: ",
147: p->files[p->filepos - 1].name,
148: p->files[p->filepos - 1].line + 1,
149: p->files[p->filepos - 1].col + 1);
150: va_start(ap, fmt);
151: vfprintf(stderr, fmt, ap);
152: va_end(ap);
153: fputc('\n', stderr);
154: texiexit(p);
155: exit(EXIT_FAILURE);
156: }
157:
158: /*
159: * Put a single data character to the output if we're not ignoring.
160: * Makes sure we don't spurriously start a macro.
161: * Adjusts our output status.
162: * This shouldn't be called for macros: just for ordinary text.
163: */
164: void
165: texiputchar(struct texi *p, char c)
166: {
167:
168: if (p->ign)
169: return;
170:
171: if ('.' == c && 0 == p->outcol)
172: fputs("\\&", stdout);
173:
174: putchar(c);
175: p->seenvs = 0;
176: if ('\n' == c) {
177: p->outcol = 0;
178: p->seenws = 0;
179: } else
180: p->outcol++;
181: }
182:
183: /*
184: * Put multiple characters (see texiputchar()).
185: * This shouldn't be called for macros: just for ordinary text.
186: */
187: void
188: texiputchars(struct texi *p, const char *s)
189: {
190:
191: while ('\0' != *s)
192: texiputchar(p, *s++);
1.9 ! kristaps 193: }
! 194:
! 195: /*
! 196: * This puts all characters onto the output stream but makes sure to
! 197: * escape mdoc(7) slashes.
! 198: */
! 199: void
! 200: texiputbuf(struct texi *p, const char *buf, size_t start, size_t end)
! 201: {
! 202:
! 203: for ( ; start < end; start++) {
! 204: texiputchar(p, buf[start]);
! 205: if ('\\' == buf[start])
! 206: texiputchar(p, 'e');
! 207: }
1.1 kristaps 208: }
209:
210: /*
211: * Close an mdoc(7) macro opened with teximacroopen().
212: * If there are no more macros on the line, prints a newline.
213: */
214: void
215: teximacroclose(struct texi *p)
216: {
217:
218: if (p->ign)
219: return;
220:
221: if (0 == --p->outmacro) {
222: putchar('\n');
223: p->outcol = p->seenws = 0;
224: }
225: }
226:
227: /*
228: * Open a mdoc(7) macro.
229: * This is used for line macros, e.g., Qq [foo bar baz].
230: * It can be invoked for nested macros, e.g., Qq Li foo .
231: * TODO: flush-right punctuation (e.g., parenthesis).
232: */
233: void
234: teximacroopen(struct texi *p, const char *s)
235: {
236: int rc;
237:
238: if (p->ign)
239: return;
240:
241: if (p->outcol && 0 == p->outmacro) {
242: putchar('\n');
243: p->outcol = 0;
244: }
245:
246: if (0 == p->outmacro)
247: putchar('.');
248: else
249: putchar(' ');
250:
251: if (EOF != (rc = fputs(s, stdout)))
252: p->outcol += rc;
253:
254: putchar(' ');
255: p->outcol++;
256: p->outmacro++;
257: p->seenws = 0;
258: }
259:
260: /*
261: * Put a stadnalone mdoc(7) command with the trailing newline.
262: */
263: void
264: teximacro(struct texi *p, const char *s)
265: {
266:
267: if (p->ign)
268: return;
269:
270: if (p->outmacro)
271: texierr(p, "\"%s\" in open line scope!?", s);
272: if (p->literal)
273: texierr(p, "\"%s\" in a literal scope!?", s);
274:
275: if (p->outcol)
276: putchar('\n');
277:
278: putchar('.');
279: puts(s);
280: p->outcol = p->seenws = 0;
281: }
282:
283: /*
284: * Introduce vertical space during normal (non-macro) input.
285: */
286: void
287: texivspace(struct texi *p)
288: {
289:
1.5 kristaps 290: if (p->seenvs || TEXILIST_TABLE == p->list)
1.1 kristaps 291: return;
292: teximacro(p, "Pp");
293: p->seenvs = 1;
294: }
295:
296: /*
297: * Advance by a single byte in the input stream, adjusting our location
298: * in the current input file.
299: */
300: void
301: advance(struct texi *p, const char *buf, size_t *pos)
302: {
303:
304: if ('\n' == buf[*pos]) {
305: p->files[p->filepos - 1].line++;
306: p->files[p->filepos - 1].col = 0;
307: } else
308: p->files[p->filepos - 1].col++;
309:
310: (*pos)++;
311: }
312:
313: /*
314: * It's common to wait punctuation to float on the right side of macro
315: * lines in mdoc(7), e.g., ".Em hello ) ."
316: * This function does so, and should be called before teximacroclose().
317: * It will detect that it's the last in the nested macros and
318: * appropriately flush-left punctuation alongside the macro.
319: */
320: void
321: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
322: {
323: size_t start, end;
324:
325: if (1 != p->outmacro)
326: return;
327:
328: for (start = end = *pos; end < sz; end++) {
329: switch (buf[end]) {
330: case (','):
331: case (')'):
332: case ('.'):
333: case ('"'):
334: case (':'):
335: case ('!'):
336: case ('?'):
337: continue;
338: default:
339: break;
340: }
341: break;
342: }
343: if (end == *pos)
344: return;
345: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
346: for ( ; start < end; start++) {
347: texiputchar(p, ' ');
348: texiputchar(p, buf[start]);
349: advance(p, buf, pos);
350: }
351: }
352: }
353:
354: /*
355: * Advance to the next non-whitespace word in the input stream.
356: * If we're in literal mode, then print all of the whitespace as we're
357: * doing so.
358: */
359: static size_t
360: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
361: {
362:
363: if (p->literal) {
364: while (*pos < sz && ismspace(buf[*pos])) {
365: if (*pos && '\n' == buf[*pos] &&
366: '\\' == buf[*pos - 1])
367: texiputchar(p, 'e');
368: texiputchar(p, buf[*pos]);
369: advance(p, buf, pos);
370: }
371: return(*pos);
372: }
373:
374: while (*pos < sz && ismspace(buf[*pos])) {
375: p->seenws = 1;
376: /*
377: * If it looks like we've printed a double-line, then
378: * output a paragraph.
379: * FIXME: this is stupid.
380: */
381: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
382: texivspace(p);
383: advance(p, buf, pos);
384: }
385: return(*pos);
386: }
387:
388: /*
389: * Advance to the EOLN in the input stream.
390: * NOTE: THIS SHOULD NOT BE CALLED ON BLANK TEXT, as it will read up to
391: * the @\n.
392: */
393: size_t
394: advanceeoln(struct texi *p, const char *buf,
395: size_t sz, size_t *pos, int consumenl)
396: {
397:
398: while (*pos < sz && '\n' != buf[*pos])
399: advance(p, buf, pos);
400: if (*pos < sz && consumenl)
401: advance(p, buf, pos);
402: return(*pos);
403: }
404:
405: /*
406: * Advance to position "end", which is an absolute position in the
407: * current buffer greater than or equal to the current position.
408: */
409: void
410: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
411: {
412:
413: assert(*pos <= end);
414: while (*pos < end)
415: advance(p, buf, pos);
416: }
417:
1.7 kristaps 418: static void
419: texiexecmacro(struct texi *p, struct teximacro *m,
420: const char *buf, size_t sz, size_t *pos)
421: {
422: size_t valsz, realsz, aasz, asz,
423: ssz, i, j, k, start, end;
424: char *val;
425: char **args;
426:
1.8 kristaps 427: args = argparse(p, buf, sz, pos, &asz, m->argsz);
1.7 kristaps 428: if (asz != m->argsz)
429: texiwarn(p, "invalid macro argument length");
430: aasz = asz < m->argsz ? asz : m->argsz;
431:
432: if (0 == aasz) {
1.8 kristaps 433: parsemembuf(p, m->value, strlen(m->value));
1.7 kristaps 434: return;
435: }
436:
437: valsz = realsz = strlen(m->value);
438: val = strdup(m->value);
439:
440: for (i = j = 0; i < realsz; i++) {
441: /* Parse blindly til the backslash delimiter. */
442: if ('\\' != m->value[i]) {
443: val[j++] = m->value[i];
444: val[j] = '\0';
445: continue;
446: } else if (i == realsz - 1)
447: texierr(p, "trailing argument name delimiter");
448:
449: /* Double-backslash is escaped. */
450: if ('\\' == m->value[i + 1]) {
451: val[j++] = m->value[i++];
452: val[j] = '\0';
453: continue;
454: }
455:
456: assert('\\' == m->value[i] && i < realsz - 1);
457:
458: /* Parse to terminating delimiter. */
459: /* FIXME: embedded, escaped delimiters? */
460: for (start = end = i + 1; end < realsz; end++)
461: if ('\\' == m->value[end])
462: break;
463: if (end == realsz)
464: texierr(p, "unterminated argument name");
465:
466: for (k = 0; k < aasz; k++) {
467: if ((ssz = strlen(m->args[k])) != (end - start))
468: continue;
469: if (strncmp(&m->value[start], m->args[k], ssz))
470: continue;
471: break;
472: }
473:
474: /*
475: * Argument didn't exist in argument table.
476: * No need to reallocate here: we just copy the text
477: * directly from the macro value into the buffer.
478: */
479: if (k == aasz) {
480: for ( ; i < end; i++)
481: val[j++] = m->value[i];
482: assert('\\' == m->value[i]);
483: val[j++] = m->value[i];
484: val[j] = '\0';
485: continue;
486: }
487:
488: if (strlen(args[k]) > ssz) {
489: valsz += strlen(args[k]);
490: val = realloc(val, valsz + 1);
491: if (NULL == val)
492: texiabort(p, NULL);
493: }
494:
495: j = strlcat(val, args[k], valsz + 1);
496: i = end;
497: }
498:
1.8 kristaps 499: parsemembuf(p, val, strlen(val));
1.7 kristaps 500:
501: for (i = 0; i < asz; i++)
502: free(args[i]);
503: free(args);
504: free(val);
505: }
506:
1.1 kristaps 507: /*
508: * Output a free-form word in the input stream, progressing to the next
509: * command or white-space.
510: * This also will advance the input stream.
511: */
512: static void
513: texiword(struct texi *p, const char *buf,
514: size_t sz, size_t *pos, char extra)
515: {
516:
517: if (p->seenws && 0 == p->outmacro &&
518: p->outcol > 72 && 0 == p->literal)
519: texiputchar(p, '\n');
520: /* FIXME: abstract this: we use it elsewhere. */
521: if (p->seenws && p->outcol && 0 == p->literal)
522: texiputchar(p, ' ');
523:
524: p->seenws = 0;
525:
526: while (*pos < sz && ! ismspace(buf[*pos])) {
527: switch (buf[*pos]) {
528: case ('@'):
529: case ('}'):
530: case ('{'):
531: return;
532: }
533: if ('\0' != extra && buf[*pos] == extra)
534: return;
535: if (*pos < sz - 1 &&
536: '`' == buf[*pos] &&
537: '`' == buf[*pos + 1]) {
538: texiputchars(p, "\\(lq");
539: advance(p, buf, pos);
540: } else if (*pos < sz - 1 &&
541: '\'' == buf[*pos] &&
542: '\'' == buf[*pos + 1]) {
543: texiputchars(p, "\\(rq");
544: advance(p, buf, pos);
545: } else
546: texiputchar(p, buf[*pos]);
547: advance(p, buf, pos);
548: }
549: }
550:
551: /*
552: * Look up the command at position "pos" in the buffer, returning it (or
553: * TEXICMD__MAX if none found) and setting "end" to be the absolute
554: * index after the command name.
555: */
556: enum texicmd
1.7 kristaps 557: texicmd(struct texi *p, const char *buf, size_t pos,
558: size_t sz, size_t *end, struct teximacro **macro)
1.1 kristaps 559: {
1.4 kristaps 560: size_t i, len, toksz;
1.1 kristaps 561:
562: assert('@' == buf[pos]);
563:
1.7 kristaps 564: if (NULL != macro)
565: *macro = NULL;
566:
1.1 kristaps 567: if ((*end = pos) == sz)
568: return(TEXICMD__MAX);
569: else if ((*end = ++pos) == sz)
570: return(TEXICMD__MAX);
571:
572: /* Alphabetic commands are special. */
573: if ( ! isalpha(buf[pos])) {
574: if ((*end = pos + 1) == sz)
575: return(TEXICMD__MAX);
576: for (i = 0; i < TEXICMD__MAX; i++) {
577: if (1 != texitoks[i].len)
578: continue;
579: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
580: return(i);
581: }
582: texiwarn(p, "bad command: @%c", buf[pos]);
583: return(TEXICMD__MAX);
584: }
585:
1.4 kristaps 586: /* Scan to the end of the possible command name. */
1.1 kristaps 587: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
588: if ((*end > pos && ('@' == buf[*end] ||
589: '{' == buf[*end] || '}' == buf[*end])))
590: break;
591:
1.4 kristaps 592: /* Look for the command. */
1.1 kristaps 593: len = *end - pos;
594: for (i = 0; i < TEXICMD__MAX; i++) {
595: if (len != texitoks[i].len)
596: continue;
597: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
598: return(i);
599: }
600:
1.4 kristaps 601: /* Look for it in our indices. */
602: for (i = 0; i < p->indexsz; i++) {
603: toksz = strlen(p->indexs[i]);
604: if (len != 5 + toksz)
605: continue;
606: if (strncmp(&buf[pos], p->indexs[i], toksz))
607: continue;
608: if (0 == strncmp(&buf[pos + toksz], "index", 5))
1.7 kristaps 609: return(TEXICMD_USER_INDEX);
610: }
611:
612: for (i = 0; i < p->macrosz; i++) {
613: if (len != strlen(p->macros[i].key))
614: continue;
615: if (strncmp(&buf[pos], p->macros[i].key, len))
616: continue;
617: if (NULL != macro)
618: *macro = &p->macros[i];
619: return(TEXICMD__MAX);
1.4 kristaps 620: }
621:
1.1 kristaps 622: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
623: return(TEXICMD__MAX);
624: }
625:
626: /*
627: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
628: * Num should be set to the argument we're currently parsing, although
629: * it suffixes for it to be zero or non-zero.
630: * This will return 1 if there are more arguments, 0 otherwise.
631: * This will stop (returning 0) in the event of EOF or if we're not at a
632: * bracket for the zeroth parse.
633: */
634: int
635: parsearg(struct texi *p, const char *buf,
636: size_t sz, size_t *pos, size_t num)
637: {
1.7 kristaps 638: size_t end;
639: enum texicmd cmd;
640: struct teximacro *macro;
1.1 kristaps 641:
642: while (*pos < sz && ismspace(buf[*pos]))
643: advance(p, buf, pos);
644: if (*pos == sz || (0 == num && '{' != buf[*pos]))
645: return(0);
646: if (0 == num)
647: advance(p, buf, pos);
648:
649: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
650: switch (buf[*pos]) {
651: case (','):
652: advance(p, buf, pos);
653: return(1);
654: case ('}'):
655: advance(p, buf, pos);
656: return(0);
657: case ('{'):
658: if (0 == p->ign)
659: texiwarn(p, "unexpected \"{\"");
660: advance(p, buf, pos);
661: continue;
662: case ('@'):
663: break;
664: default:
665: texiword(p, buf, sz, pos, ',');
666: continue;
667: }
668:
1.7 kristaps 669: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 670: advanceto(p, buf, pos, end);
1.7 kristaps 671: if (NULL != macro)
672: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 673: if (TEXICMD__MAX == cmd)
674: continue;
675: if (NULL != texitoks[cmd].fp)
676: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
677: }
678: return(0);
679: }
680:
681: /*
682: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
683: * This will stop in the event of EOF or if we're not at a bracket.
684: */
685: void
686: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
687: {
1.7 kristaps 688: size_t end;
689: enum texicmd cmd;
690: struct teximacro *macro;
1.1 kristaps 691:
692: while (*pos < sz && ismspace(buf[*pos]))
693: advance(p, buf, pos);
694:
695: if (*pos == sz || '{' != buf[*pos])
696: return;
697: advance(p, buf, pos);
698:
699: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
700: switch (buf[*pos]) {
701: case ('}'):
702: advance(p, buf, pos);
703: return;
704: case ('{'):
705: if (0 == p->ign)
706: texiwarn(p, "unexpected \"{\"");
707: advance(p, buf, pos);
708: continue;
709: case ('@'):
710: break;
711: default:
712: texiword(p, buf, sz, pos, '\0');
713: continue;
714: }
715:
1.7 kristaps 716: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 717: advanceto(p, buf, pos, end);
1.7 kristaps 718: if (NULL != macro)
719: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 720: if (TEXICMD__MAX == cmd)
721: continue;
722: if (NULL != texitoks[cmd].fp)
723: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
724: }
725: }
726:
727: /*
728: * This should be invoked when we're on a macro line and want to process
729: * to the end of the current input line, doing all of our macros along
730: * the way.
731: */
732: void
733: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
734: {
1.7 kristaps 735: size_t end;
736: enum texicmd cmd;
737: struct teximacro *macro;
1.1 kristaps 738:
739: while (*pos < sz && '\n' != buf[*pos]) {
740: while (*pos < sz && isws(buf[*pos])) {
741: p->seenws = 1;
742: if (p->literal)
743: texiputchar(p, buf[*pos]);
744: advance(p, buf, pos);
745: }
746: switch (buf[*pos]) {
747: case ('}'):
748: if (0 == p->ign)
749: texiwarn(p, "unexpected \"}\"");
750: advance(p, buf, pos);
751: continue;
752: case ('{'):
753: if (0 == p->ign)
754: texiwarn(p, "unexpected \"{\"");
755: advance(p, buf, pos);
756: continue;
757: case ('@'):
758: break;
759: default:
760: texiword(p, buf, sz, pos, '\0');
761: continue;
762: }
763:
1.7 kristaps 764: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 765: advanceto(p, buf, pos, end);
1.7 kristaps 766: if (NULL != macro)
767: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 768: if (TEXICMD__MAX == cmd)
769: continue;
770: if (NULL != texitoks[cmd].fp)
771: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
772: }
773: }
774:
775: /*
776: * Parse a single word or command.
777: * This will return immediately at the EOF.
778: */
779: void
780: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
781: {
1.7 kristaps 782: size_t end;
783: enum texicmd cmd;
784: struct teximacro *macro;
1.1 kristaps 785:
786: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
787: return;
788:
789: switch (buf[*pos]) {
790: case ('}'):
791: if (0 == p->ign)
792: texiwarn(p, "unexpected \"}\"");
793: advance(p, buf, pos);
794: return;
795: case ('{'):
796: if (0 == p->ign)
797: texiwarn(p, "unexpected \"{\"");
798: advance(p, buf, pos);
799: return;
800: case ('@'):
801: break;
802: default:
803: texiword(p, buf, sz, pos, '\0');
804: return;
805: }
806:
1.7 kristaps 807: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 808: advanceto(p, buf, pos, end);
1.7 kristaps 809: if (NULL != macro)
810: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 811: if (TEXICMD__MAX == cmd)
812: return;
813: if (NULL != texitoks[cmd].fp)
814: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
815: }
816:
817: /*
818: * This is used in the @deffn type of command.
819: * These have an arbitrary number of line arguments; however, these
820: * arguments may or may not be surrounded by brackets.
821: * In this function, we parse each one as either a bracketed or
822: * non-bracketed argument, returning 0 when we've reached the end of
823: * line or 1 otherwise.
824: */
825: int
826: parselinearg(struct texi *p, const char *buf, size_t sz, size_t *pos)
827: {
828:
829: while (*pos < sz && isws(buf[*pos])) {
830: p->seenws = 1;
831: advance(p, buf, pos);
832: }
833:
834: if (*pos < sz && '{' == buf[*pos])
835: parsebracket(p, buf, sz, pos);
1.3 kristaps 836: else if (*pos < sz && '\n' != buf[*pos])
1.1 kristaps 837: parsesingle(p, buf, sz, pos);
838: else
839: return(0);
840:
841: return(1);
842: }
843:
844: /*
845: * Parse til the end of the buffer.
846: */
847: void
848: parseeof(struct texi *p, const char *buf, size_t sz)
849: {
850: size_t pos;
851:
852: for (pos = 0; pos < sz; )
853: parsesingle(p, buf, sz, &pos);
854: }
855:
856: /*
1.8 kristaps 857: * This is like parseeof() except that it's to be invoked on memory
858: * buffers while parsing a larger scope.
859: * This is useful for parsing macro sequences.
860: * The line, column, and name of the calling file context are saved, the
861: * column and line reset, then all of these restored after parse.
862: */
863: void
864: parsemembuf(struct texi *p, const char *buf, size_t sz)
865: {
866: size_t svln, svcol;
867: const char *svname;
868:
869: svln = p->files[p->filepos - 1].line;
870: svcol = p->files[p->filepos - 1].col;
871: svname = p->files[p->filepos - 1].name;
872:
873: p->files[p->filepos - 1].line = 0;
874: p->files[p->filepos - 1].col = 0;
875: p->files[p->filepos - 1].name = "<macro buffer>";
876:
877: parseeof(p, buf, sz);
878:
879: p->files[p->filepos - 1].line = svln;
880: p->files[p->filepos - 1].col = svcol;
881: p->files[p->filepos - 1].name = svname;
882: }
883:
884: /*
1.1 kristaps 885: * Parse a block sequence until we have the "@end endtoken" command
886: * invocation.
887: * This will return immediately at EOF.
888: */
889: void
890: parseto(struct texi *p, const char *buf,
891: size_t sz, size_t *pos, const char *endtoken)
892: {
1.7 kristaps 893: size_t end;
894: enum texicmd cmd;
895: size_t endtoksz;
896: struct teximacro *macro;
1.1 kristaps 897:
898: endtoksz = strlen(endtoken);
899: assert(endtoksz > 0);
900:
901: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
902: switch (buf[*pos]) {
903: case ('}'):
904: if (0 == p->ign)
905: texiwarn(p, "unexpected \"}\"");
906: advance(p, buf, pos);
907: continue;
908: case ('{'):
909: if (0 == p->ign)
910: texiwarn(p, "unexpected \"{\"");
911: advance(p, buf, pos);
912: continue;
913: case ('@'):
914: break;
915: default:
916: texiword(p, buf, sz, pos, '\0');
917: continue;
918: }
919:
1.7 kristaps 920: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 921: advanceto(p, buf, pos, end);
922: if (TEXICMD_END == cmd) {
923: while (*pos < sz && isws(buf[*pos]))
924: advance(p, buf, pos);
925: /*
926: * FIXME: check the full word, not just its
927: * initial substring!
928: */
929: if (sz - *pos >= endtoksz && 0 == strncmp
930: (&buf[*pos], endtoken, endtoksz)) {
931: advanceeoln(p, buf, sz, pos, 0);
932: break;
933: }
934: if (0 == p->ign)
935: texiwarn(p, "unexpected \"end\"");
936: advanceeoln(p, buf, sz, pos, 0);
937: continue;
1.7 kristaps 938: }
939: if (NULL != macro)
940: texiexecmacro(p, macro, buf, sz, pos);
941: if (TEXICMD__MAX == cmd)
942: continue;
943: if (NULL != texitoks[cmd].fp)
944: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1.1 kristaps 945: }
946: }
947:
948: /*
949: * Memory-map the file "fname" and begin parsing it unless "parse" is
950: * zero, in which case we just dump the file to stdout (making sure it
951: * doesn't trip up mdoc(7) along the way).
952: * This can be called in a nested context.
953: */
954: void
955: parsefile(struct texi *p, const char *fname, int parse)
956: {
957: struct texifile *f;
958: int fd;
959: struct stat st;
960: size_t i;
961:
1.5 kristaps 962: if (64 == p->filepos)
1.6 kristaps 963: texierr(p, "too many open files");
1.1 kristaps 964: f = &p->files[p->filepos];
965: memset(f, 0, sizeof(struct texifile));
966:
967: f->name = fname;
968: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
969: texiabort(p, fname);
970: } else if (-1 == fstat(fd, &st)) {
971: close(fd);
972: texiabort(p, fname);
973: }
974:
975: f->mapsz = st.st_size;
976: f->map = mmap(NULL, f->mapsz,
977: PROT_READ, MAP_SHARED, fd, 0);
978: close(fd);
979:
980: if (MAP_FAILED == f->map)
981: texiabort(p, fname);
982:
983: p->filepos++;
984: if ( ! parse) {
985: /*
986: * We're printing verbatim output.
987: * Make sure it doesn't get interpreted as mdoc by
988: * escaping escapes and making sure leading dots don't
989: * trigger mdoc(7) expansion.
990: */
991: for (i = 0; i < f->mapsz; i++) {
992: if (i > 0 && '.' == f->map[i])
993: if ('\n' == f->map[i - 1])
994: fputs("\\&", stdout);
995: putchar(f->map[i]);
996: if ('\\' == f->map[i])
997: putchar('e');
998: }
999: } else
1000: parseeof(p, f->map, f->mapsz);
1001: texifilepop(p);
1002: }
1003:
1.2 kristaps 1004: /*
1005: * Look up the value to a stored pair's value starting in "buf" from
1006: * start to end.
1007: * Return the pointer to the value memory, which can be NULL if the
1008: * pointer key does not exist.
1009: * The pointer can point to NULL if the value has been unset.
1010: */
1011: static char **
1012: valuequery(const struct texi *p,
1013: const char *buf, size_t start, size_t end)
1014: {
1015: size_t i, sz, len;
1016:
1017: assert(end >= start);
1018: /* Ignore zero-length. */
1019: if (0 == (len = (end - start)))
1020: return(NULL);
1021: for (i = 0; i < p->valsz; i++) {
1022: sz = strlen(p->vals[i].key);
1023: if (sz != len)
1024: continue;
1025: if (0 == strncmp(p->vals[i].key, &buf[start], len))
1026: return(&p->vals[i].value);
1027: }
1028: return(NULL);
1029: }
1030:
1031: /*
1032: * Parse a key until the end of line, e.g., @clear foo\n, and return the
1033: * pointer to its value via valuequery().
1034: */
1035: static char **
1036: valuelquery(struct texi *p, const char *buf, size_t sz, size_t *pos)
1037: {
1038: size_t start, end;
1039: char **ret;
1040:
1041: while (*pos < sz && isws(buf[*pos]))
1042: advance(p, buf, pos);
1043: if (*pos == sz)
1044: return(NULL);
1045: for (start = end = *pos; end < sz; end++)
1046: if ('\n' == buf[end])
1047: break;
1048: advanceto(p, buf, pos, end);
1049: if (*pos < sz) {
1050: assert('\n' == buf[*pos]);
1051: advance(p, buf, pos);
1052: }
1053: if (NULL == (ret = valuequery(p, buf, start, end)))
1054: return(NULL);
1055: return(ret);
1056: }
1057:
1058: void
1059: valuelclear(struct texi *p, const char *buf, size_t sz, size_t *pos)
1060: {
1061: char **ret;
1062:
1063: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1064: return;
1065: free(*ret);
1066: *ret = NULL;
1067: }
1068:
1069: const char *
1070: valuellookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1071: {
1072: char **ret;
1073:
1074: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1075: return(NULL);
1076: return(*ret);
1077: }
1078:
1079: /*
1080: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1081: * the pointer to its value.
1082: * If the returned pointer is NULL, either there was no string within
1083: * the brackets (or no brackets), or the value was not found, or the
1084: * value had previously been unset.
1085: */
1086: const char *
1087: valueblookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1088: {
1089: size_t start, end;
1090: char **ret;
1091:
1092: while (*pos < sz && isws(buf[*pos]))
1093: advance(p, buf, pos);
1094: if (*pos == sz || '{' != buf[*pos])
1095: return(NULL);
1096: advance(p, buf, pos);
1097: for (start = end = *pos; end < sz; end++)
1098: if ('}' == buf[end])
1099: break;
1100: advanceto(p, buf, pos, end);
1101: if (*pos < sz) {
1102: assert('}' == buf[*pos]);
1103: advance(p, buf, pos);
1104: }
1105: if (NULL == (ret = valuequery(p, buf, start, end)))
1106: return(NULL);
1107: return(*ret);
1108: }
1109:
1110: void
1111: valueadd(struct texi *p, char *key, char *val)
1112: {
1113: size_t i;
1114:
1115: assert(NULL != key);
1116: assert(NULL != val);
1117:
1118: for (i = 0; i < p->valsz; i++)
1119: if (0 == strcmp(p->vals[i].key, key))
1120: break;
1121:
1122: if (i < p->valsz) {
1123: free(key);
1124: free(p->vals[i].value);
1125: p->vals[i].value = val;
1126: } else {
1.4 kristaps 1127: /* FIXME: reallocarray() */
1.2 kristaps 1128: p->vals = realloc(p->vals,
1129: (p->valsz + 1) *
1130: sizeof(struct texivalue));
1.4 kristaps 1131: if (NULL == p->vals)
1132: texiabort(p, NULL);
1.2 kristaps 1133: p->vals[p->valsz].key = key;
1134: p->vals[p->valsz].value = val;
1135: p->valsz++;
1136: }
1.7 kristaps 1137: }
1138:
1139: /*
1140: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
1141: * declaration form, @macro foo {arg1, ...}) and textually convert it to
1142: * an array of arguments of size "argsz".
1143: * These need to be freed individually and as a whole.
1144: * NOTE: this will puke on @, or @} macros, which can trick it into
1145: * stopping argument parsing earlier.
1146: * Ergo, textual: this doesn't interpret the arguments in any way.
1147: */
1148: char **
1149: argparse(struct texi *p, const char *buf,
1.8 kristaps 1150: size_t sz, size_t *pos, size_t *argsz, size_t hint)
1.7 kristaps 1151: {
1152: char **args;
1153: size_t start, end, stack;
1154:
1155: while (*pos < sz && isws(buf[*pos]))
1156: advance(p, buf, pos);
1157:
1158: args = NULL;
1159: *argsz = 0;
1160:
1161: /* Check for no arguments. */
1162: if ('{' != buf[*pos])
1163: return(args);
1164:
1165: /* Parse til the closing '}', putting into the array. */
1166: advance(p, buf, pos);
1167: while (*pos < sz) {
1168: while (*pos < sz && isws(buf[*pos]))
1169: advance(p, buf, pos);
1170: start = *pos;
1171: stack = 0;
1172: while (*pos < sz) {
1173: /*
1174: * According to the manual, commas within
1175: * embedded commands are escaped.
1176: * We keep track of embedded-ness in the "stack"
1177: * state anyway, so this is free.
1178: */
1.8 kristaps 1179: if (',' == buf[*pos] && 0 == stack && 1 != hint)
1.7 kristaps 1180: break;
1181: else if (0 == stack && '}' == buf[*pos])
1182: break;
1183: else if (0 != stack && '}' == buf[*pos])
1184: stack--;
1185: else if ('{' == buf[*pos])
1186: stack++;
1187: advance(p, buf, pos);
1188: }
1189: if (stack)
1190: texiwarn(p, "unterminated macro "
1191: "in macro arguments");
1192: if ((end = *pos) == sz)
1193: break;
1194: /* Test for zero-length '{ }'. */
1195: if (start == end && '}' == buf[*pos] && 0 == *argsz)
1196: break;
1197: if (start == end)
1198: texierr(p, "zero-length argument");
1199: /* FIXME: use reallocarray. */
1200: args = realloc
1201: (args, sizeof(char *) *
1202: (*argsz + 1));
1203: if (NULL == args)
1204: texiabort(p, NULL);
1205: args[*argsz] = malloc(end - start + 1);
1206: if (NULL == args[*argsz])
1207: texiabort(p, NULL);
1208: memcpy(args[*argsz],
1209: &buf[start], end - start);
1210: args[*argsz][end - start] = '\0';
1211: (*argsz)++;
1212: if ('}' == buf[*pos])
1213: break;
1214: advance(p, buf, pos);
1215: }
1216:
1217: if (*pos == sz)
1218: texierr(p, "unterminated arguments");
1219: assert('}' == buf[*pos]);
1220: advance(p, buf, pos);
1221: return(args);
1.2 kristaps 1222: }
CVSweb