Annotation of texi2mdoc/util.c, Revision 1.7
1.7 ! kristaps 1: /* $Id: util.c,v 1.6 2015/02/21 22:01:32 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <libgen.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <time.h>
31: #include <unistd.h>
32:
33: #include "extern.h"
34:
35: /*
36: * Unmap the top-most file in the stack of files currently opened (that
37: * is, nested calls to parsefile()).
38: */
39: void
40: texifilepop(struct texi *p)
41: {
42: struct texifile *f;
43:
44: assert(p->filepos > 0);
45: f = &p->files[--p->filepos];
46: munmap(f->map, f->mapsz);
47: }
48:
1.7 ! kristaps 49: static void
! 50: teximacrofree(struct teximacro *p)
! 51: {
! 52: size_t i;
! 53:
! 54: for (i = 0; i < p->argsz; i++)
! 55: free(p->args[i]);
! 56:
! 57: free(p->args);
! 58: free(p->key);
! 59: free(p->value);
! 60: }
! 61:
! 62: static void
! 63: texivaluefree(struct texivalue *p)
! 64: {
! 65:
! 66: free(p->key);
! 67: free(p->value);
! 68: }
! 69:
1.1 kristaps 70: /*
71: * Unmap all files that we're currently using and free all resources
72: * that we've allocated during the parse.
73: * The utility should exit(...) after this is called.
74: */
75: void
76: texiexit(struct texi *p)
77: {
78: size_t i;
79:
80: /* Make sure we're newline-terminated. */
81: if (p->outcol)
82: putchar('\n');
83:
84: /* Unmap all files. */
85: while (p->filepos > 0)
86: texifilepop(p);
87:
1.7 ! kristaps 88: for (i = 0; i < p->macrosz; i++)
! 89: teximacrofree(&p->macros[i]);
1.1 kristaps 90: for (i = 0; i < p->dirsz; i++)
91: free(p->dirs[i]);
1.4 kristaps 92: for (i = 0; i < p->indexsz; i++)
93: free(p->indexs[i]);
1.7 ! kristaps 94: for (i = 0; i < p->valsz; i++)
! 95: texivaluefree(&p->vals[i]);
1.4 kristaps 96:
1.7 ! kristaps 97: free(p->macros);
1.1 kristaps 98: free(p->vals);
1.4 kristaps 99: free(p->indexs);
1.1 kristaps 100: free(p->dirs);
101: free(p->subtitle);
102: free(p->title);
103: }
104:
105: /*
106: * Fatal error: unmap all files and exit.
107: * The "errstring" is passed to perror(3).
108: */
109: void
110: texiabort(struct texi *p, const char *errstring)
111: {
112:
113: perror(errstring);
114: texiexit(p);
115: exit(EXIT_FAILURE);
116: }
117:
118: /*
119: * Print a generic warning message (to stderr) tied to our current
120: * location in the parse sequence.
121: */
122: void
123: texiwarn(const struct texi *p, const char *fmt, ...)
124: {
125: va_list ap;
126:
127: fprintf(stderr, "%s:%zu:%zu: warning: ",
128: p->files[p->filepos - 1].name,
129: p->files[p->filepos - 1].line + 1,
130: p->files[p->filepos - 1].col + 1);
131: va_start(ap, fmt);
132: vfprintf(stderr, fmt, ap);
133: va_end(ap);
134: fputc('\n', stderr);
135: }
136:
137: /*
138: * Print an error message (to stderr) tied to our current location in
139: * the parse sequence, invoke texiexit(), then die.
140: */
141: void
142: texierr(struct texi *p, const char *fmt, ...)
143: {
144: va_list ap;
145:
146: fprintf(stderr, "%s:%zu:%zu: error: ",
147: p->files[p->filepos - 1].name,
148: p->files[p->filepos - 1].line + 1,
149: p->files[p->filepos - 1].col + 1);
150: va_start(ap, fmt);
151: vfprintf(stderr, fmt, ap);
152: va_end(ap);
153: fputc('\n', stderr);
154: texiexit(p);
155: exit(EXIT_FAILURE);
156: }
157:
158: /*
159: * Put a single data character to the output if we're not ignoring.
160: * Makes sure we don't spurriously start a macro.
161: * Adjusts our output status.
162: * This shouldn't be called for macros: just for ordinary text.
163: */
164: void
165: texiputchar(struct texi *p, char c)
166: {
167:
168: if (p->ign)
169: return;
170:
171: if ('.' == c && 0 == p->outcol)
172: fputs("\\&", stdout);
173:
174: putchar(c);
175: p->seenvs = 0;
176: if ('\n' == c) {
177: p->outcol = 0;
178: p->seenws = 0;
179: } else
180: p->outcol++;
181: }
182:
183: /*
184: * Put multiple characters (see texiputchar()).
185: * This shouldn't be called for macros: just for ordinary text.
186: */
187: void
188: texiputchars(struct texi *p, const char *s)
189: {
190:
191: while ('\0' != *s)
192: texiputchar(p, *s++);
193: }
194:
195: /*
196: * Close an mdoc(7) macro opened with teximacroopen().
197: * If there are no more macros on the line, prints a newline.
198: */
199: void
200: teximacroclose(struct texi *p)
201: {
202:
203: if (p->ign)
204: return;
205:
206: if (0 == --p->outmacro) {
207: putchar('\n');
208: p->outcol = p->seenws = 0;
209: }
210: }
211:
212: /*
213: * Open a mdoc(7) macro.
214: * This is used for line macros, e.g., Qq [foo bar baz].
215: * It can be invoked for nested macros, e.g., Qq Li foo .
216: * TODO: flush-right punctuation (e.g., parenthesis).
217: */
218: void
219: teximacroopen(struct texi *p, const char *s)
220: {
221: int rc;
222:
223: if (p->ign)
224: return;
225:
226: if (p->outcol && 0 == p->outmacro) {
227: putchar('\n');
228: p->outcol = 0;
229: }
230:
231: if (0 == p->outmacro)
232: putchar('.');
233: else
234: putchar(' ');
235:
236: if (EOF != (rc = fputs(s, stdout)))
237: p->outcol += rc;
238:
239: putchar(' ');
240: p->outcol++;
241: p->outmacro++;
242: p->seenws = 0;
243: }
244:
245: /*
246: * Put a stadnalone mdoc(7) command with the trailing newline.
247: */
248: void
249: teximacro(struct texi *p, const char *s)
250: {
251:
252: if (p->ign)
253: return;
254:
255: if (p->outmacro)
256: texierr(p, "\"%s\" in open line scope!?", s);
257: if (p->literal)
258: texierr(p, "\"%s\" in a literal scope!?", s);
259:
260: if (p->outcol)
261: putchar('\n');
262:
263: putchar('.');
264: puts(s);
265: p->outcol = p->seenws = 0;
266: }
267:
268: /*
269: * Introduce vertical space during normal (non-macro) input.
270: */
271: void
272: texivspace(struct texi *p)
273: {
274:
1.5 kristaps 275: if (p->seenvs || TEXILIST_TABLE == p->list)
1.1 kristaps 276: return;
277: teximacro(p, "Pp");
278: p->seenvs = 1;
279: }
280:
281: /*
282: * Advance by a single byte in the input stream, adjusting our location
283: * in the current input file.
284: */
285: void
286: advance(struct texi *p, const char *buf, size_t *pos)
287: {
288:
289: if ('\n' == buf[*pos]) {
290: p->files[p->filepos - 1].line++;
291: p->files[p->filepos - 1].col = 0;
292: } else
293: p->files[p->filepos - 1].col++;
294:
295: (*pos)++;
296: }
297:
298: /*
299: * It's common to wait punctuation to float on the right side of macro
300: * lines in mdoc(7), e.g., ".Em hello ) ."
301: * This function does so, and should be called before teximacroclose().
302: * It will detect that it's the last in the nested macros and
303: * appropriately flush-left punctuation alongside the macro.
304: */
305: void
306: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
307: {
308: size_t start, end;
309:
310: if (1 != p->outmacro)
311: return;
312:
313: for (start = end = *pos; end < sz; end++) {
314: switch (buf[end]) {
315: case (','):
316: case (')'):
317: case ('.'):
318: case ('"'):
319: case (':'):
320: case ('!'):
321: case ('?'):
322: continue;
323: default:
324: break;
325: }
326: break;
327: }
328: if (end == *pos)
329: return;
330: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
331: for ( ; start < end; start++) {
332: texiputchar(p, ' ');
333: texiputchar(p, buf[start]);
334: advance(p, buf, pos);
335: }
336: }
337: }
338:
339: /*
340: * Advance to the next non-whitespace word in the input stream.
341: * If we're in literal mode, then print all of the whitespace as we're
342: * doing so.
343: */
344: static size_t
345: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
346: {
347:
348: if (p->literal) {
349: while (*pos < sz && ismspace(buf[*pos])) {
350: if (*pos && '\n' == buf[*pos] &&
351: '\\' == buf[*pos - 1])
352: texiputchar(p, 'e');
353: texiputchar(p, buf[*pos]);
354: advance(p, buf, pos);
355: }
356: return(*pos);
357: }
358:
359: while (*pos < sz && ismspace(buf[*pos])) {
360: p->seenws = 1;
361: /*
362: * If it looks like we've printed a double-line, then
363: * output a paragraph.
364: * FIXME: this is stupid.
365: */
366: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
367: texivspace(p);
368: advance(p, buf, pos);
369: }
370: return(*pos);
371: }
372:
373: /*
374: * Advance to the EOLN in the input stream.
375: * NOTE: THIS SHOULD NOT BE CALLED ON BLANK TEXT, as it will read up to
376: * the @\n.
377: */
378: size_t
379: advanceeoln(struct texi *p, const char *buf,
380: size_t sz, size_t *pos, int consumenl)
381: {
382:
383: while (*pos < sz && '\n' != buf[*pos])
384: advance(p, buf, pos);
385: if (*pos < sz && consumenl)
386: advance(p, buf, pos);
387: return(*pos);
388: }
389:
390: /*
391: * Advance to position "end", which is an absolute position in the
392: * current buffer greater than or equal to the current position.
393: */
394: void
395: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
396: {
397:
398: assert(*pos <= end);
399: while (*pos < end)
400: advance(p, buf, pos);
401: }
402:
1.7 ! kristaps 403: static void
! 404: texiexecmacro(struct texi *p, struct teximacro *m,
! 405: const char *buf, size_t sz, size_t *pos)
! 406: {
! 407: size_t valsz, realsz, aasz, asz,
! 408: ssz, i, j, k, start, end;
! 409: char *val;
! 410: char **args;
! 411:
! 412: args = argparse(p, buf, sz, pos, &asz);
! 413: if (asz != m->argsz)
! 414: texiwarn(p, "invalid macro argument length");
! 415: aasz = asz < m->argsz ? asz : m->argsz;
! 416:
! 417: if (0 == aasz) {
! 418: parseeof(p, m->value, strlen(m->value));
! 419: return;
! 420: }
! 421:
! 422: valsz = realsz = strlen(m->value);
! 423: val = strdup(m->value);
! 424:
! 425: for (i = j = 0; i < realsz; i++) {
! 426: /* Parse blindly til the backslash delimiter. */
! 427: if ('\\' != m->value[i]) {
! 428: val[j++] = m->value[i];
! 429: val[j] = '\0';
! 430: continue;
! 431: } else if (i == realsz - 1)
! 432: texierr(p, "trailing argument name delimiter");
! 433:
! 434: /* Double-backslash is escaped. */
! 435: if ('\\' == m->value[i + 1]) {
! 436: val[j++] = m->value[i++];
! 437: val[j] = '\0';
! 438: continue;
! 439: }
! 440:
! 441: assert('\\' == m->value[i] && i < realsz - 1);
! 442:
! 443: /* Parse to terminating delimiter. */
! 444: /* FIXME: embedded, escaped delimiters? */
! 445: for (start = end = i + 1; end < realsz; end++)
! 446: if ('\\' == m->value[end])
! 447: break;
! 448: if (end == realsz)
! 449: texierr(p, "unterminated argument name");
! 450:
! 451: for (k = 0; k < aasz; k++) {
! 452: if ((ssz = strlen(m->args[k])) != (end - start))
! 453: continue;
! 454: if (strncmp(&m->value[start], m->args[k], ssz))
! 455: continue;
! 456: break;
! 457: }
! 458:
! 459: /*
! 460: * Argument didn't exist in argument table.
! 461: * No need to reallocate here: we just copy the text
! 462: * directly from the macro value into the buffer.
! 463: */
! 464: if (k == aasz) {
! 465: for ( ; i < end; i++)
! 466: val[j++] = m->value[i];
! 467: assert('\\' == m->value[i]);
! 468: val[j++] = m->value[i];
! 469: val[j] = '\0';
! 470: continue;
! 471: }
! 472:
! 473: if (strlen(args[k]) > ssz) {
! 474: valsz += strlen(args[k]);
! 475: val = realloc(val, valsz + 1);
! 476: if (NULL == val)
! 477: texiabort(p, NULL);
! 478: }
! 479:
! 480: j = strlcat(val, args[k], valsz + 1);
! 481: i = end;
! 482: }
! 483:
! 484: parseeof(p, val, strlen(val));
! 485:
! 486: for (i = 0; i < asz; i++)
! 487: free(args[i]);
! 488: free(args);
! 489: free(val);
! 490: }
! 491:
1.1 kristaps 492: /*
493: * Output a free-form word in the input stream, progressing to the next
494: * command or white-space.
495: * This also will advance the input stream.
496: */
497: static void
498: texiword(struct texi *p, const char *buf,
499: size_t sz, size_t *pos, char extra)
500: {
501:
502: if (p->seenws && 0 == p->outmacro &&
503: p->outcol > 72 && 0 == p->literal)
504: texiputchar(p, '\n');
505: /* FIXME: abstract this: we use it elsewhere. */
506: if (p->seenws && p->outcol && 0 == p->literal)
507: texiputchar(p, ' ');
508:
509: p->seenws = 0;
510:
511: while (*pos < sz && ! ismspace(buf[*pos])) {
512: switch (buf[*pos]) {
513: case ('@'):
514: case ('}'):
515: case ('{'):
516: return;
517: }
518: if ('\0' != extra && buf[*pos] == extra)
519: return;
520: if (*pos < sz - 1 &&
521: '`' == buf[*pos] &&
522: '`' == buf[*pos + 1]) {
523: texiputchars(p, "\\(lq");
524: advance(p, buf, pos);
525: } else if (*pos < sz - 1 &&
526: '\'' == buf[*pos] &&
527: '\'' == buf[*pos + 1]) {
528: texiputchars(p, "\\(rq");
529: advance(p, buf, pos);
530: } else
531: texiputchar(p, buf[*pos]);
532: advance(p, buf, pos);
533: }
534: }
535:
536: /*
537: * Look up the command at position "pos" in the buffer, returning it (or
538: * TEXICMD__MAX if none found) and setting "end" to be the absolute
539: * index after the command name.
540: */
541: enum texicmd
1.7 ! kristaps 542: texicmd(struct texi *p, const char *buf, size_t pos,
! 543: size_t sz, size_t *end, struct teximacro **macro)
1.1 kristaps 544: {
1.4 kristaps 545: size_t i, len, toksz;
1.1 kristaps 546:
547: assert('@' == buf[pos]);
548:
1.7 ! kristaps 549: if (NULL != macro)
! 550: *macro = NULL;
! 551:
1.1 kristaps 552: if ((*end = pos) == sz)
553: return(TEXICMD__MAX);
554: else if ((*end = ++pos) == sz)
555: return(TEXICMD__MAX);
556:
557: /* Alphabetic commands are special. */
558: if ( ! isalpha(buf[pos])) {
559: if ((*end = pos + 1) == sz)
560: return(TEXICMD__MAX);
561: for (i = 0; i < TEXICMD__MAX; i++) {
562: if (1 != texitoks[i].len)
563: continue;
564: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
565: return(i);
566: }
567: texiwarn(p, "bad command: @%c", buf[pos]);
568: return(TEXICMD__MAX);
569: }
570:
1.4 kristaps 571: /* Scan to the end of the possible command name. */
1.1 kristaps 572: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
573: if ((*end > pos && ('@' == buf[*end] ||
574: '{' == buf[*end] || '}' == buf[*end])))
575: break;
576:
1.4 kristaps 577: /* Look for the command. */
1.1 kristaps 578: len = *end - pos;
579: for (i = 0; i < TEXICMD__MAX; i++) {
580: if (len != texitoks[i].len)
581: continue;
582: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
583: return(i);
584: }
585:
1.4 kristaps 586: /* Look for it in our indices. */
587: for (i = 0; i < p->indexsz; i++) {
588: toksz = strlen(p->indexs[i]);
589: if (len != 5 + toksz)
590: continue;
591: if (strncmp(&buf[pos], p->indexs[i], toksz))
592: continue;
593: if (0 == strncmp(&buf[pos + toksz], "index", 5))
1.7 ! kristaps 594: return(TEXICMD_USER_INDEX);
! 595: }
! 596:
! 597: for (i = 0; i < p->macrosz; i++) {
! 598: if (len != strlen(p->macros[i].key))
! 599: continue;
! 600: if (strncmp(&buf[pos], p->macros[i].key, len))
! 601: continue;
! 602: if (NULL != macro)
! 603: *macro = &p->macros[i];
! 604: return(TEXICMD__MAX);
1.4 kristaps 605: }
606:
1.1 kristaps 607: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
608: return(TEXICMD__MAX);
609: }
610:
611: /*
612: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
613: * Num should be set to the argument we're currently parsing, although
614: * it suffixes for it to be zero or non-zero.
615: * This will return 1 if there are more arguments, 0 otherwise.
616: * This will stop (returning 0) in the event of EOF or if we're not at a
617: * bracket for the zeroth parse.
618: */
619: int
620: parsearg(struct texi *p, const char *buf,
621: size_t sz, size_t *pos, size_t num)
622: {
1.7 ! kristaps 623: size_t end;
! 624: enum texicmd cmd;
! 625: struct teximacro *macro;
1.1 kristaps 626:
627: while (*pos < sz && ismspace(buf[*pos]))
628: advance(p, buf, pos);
629: if (*pos == sz || (0 == num && '{' != buf[*pos]))
630: return(0);
631: if (0 == num)
632: advance(p, buf, pos);
633:
634: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
635: switch (buf[*pos]) {
636: case (','):
637: advance(p, buf, pos);
638: return(1);
639: case ('}'):
640: advance(p, buf, pos);
641: return(0);
642: case ('{'):
643: if (0 == p->ign)
644: texiwarn(p, "unexpected \"{\"");
645: advance(p, buf, pos);
646: continue;
647: case ('@'):
648: break;
649: default:
650: texiword(p, buf, sz, pos, ',');
651: continue;
652: }
653:
1.7 ! kristaps 654: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 655: advanceto(p, buf, pos, end);
1.7 ! kristaps 656: if (NULL != macro)
! 657: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 658: if (TEXICMD__MAX == cmd)
659: continue;
660: if (NULL != texitoks[cmd].fp)
661: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
662: }
663: return(0);
664: }
665:
666: /*
667: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
668: * This will stop in the event of EOF or if we're not at a bracket.
669: */
670: void
671: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
672: {
1.7 ! kristaps 673: size_t end;
! 674: enum texicmd cmd;
! 675: struct teximacro *macro;
1.1 kristaps 676:
677: while (*pos < sz && ismspace(buf[*pos]))
678: advance(p, buf, pos);
679:
680: if (*pos == sz || '{' != buf[*pos])
681: return;
682: advance(p, buf, pos);
683:
684: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
685: switch (buf[*pos]) {
686: case ('}'):
687: advance(p, buf, pos);
688: return;
689: case ('{'):
690: if (0 == p->ign)
691: texiwarn(p, "unexpected \"{\"");
692: advance(p, buf, pos);
693: continue;
694: case ('@'):
695: break;
696: default:
697: texiword(p, buf, sz, pos, '\0');
698: continue;
699: }
700:
1.7 ! kristaps 701: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 702: advanceto(p, buf, pos, end);
1.7 ! kristaps 703: if (NULL != macro)
! 704: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 705: if (TEXICMD__MAX == cmd)
706: continue;
707: if (NULL != texitoks[cmd].fp)
708: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
709: }
710: }
711:
712: /*
713: * This should be invoked when we're on a macro line and want to process
714: * to the end of the current input line, doing all of our macros along
715: * the way.
716: */
717: void
718: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
719: {
1.7 ! kristaps 720: size_t end;
! 721: enum texicmd cmd;
! 722: struct teximacro *macro;
1.1 kristaps 723:
724: while (*pos < sz && '\n' != buf[*pos]) {
725: while (*pos < sz && isws(buf[*pos])) {
726: p->seenws = 1;
727: if (p->literal)
728: texiputchar(p, buf[*pos]);
729: advance(p, buf, pos);
730: }
731: switch (buf[*pos]) {
732: case ('}'):
733: if (0 == p->ign)
734: texiwarn(p, "unexpected \"}\"");
735: advance(p, buf, pos);
736: continue;
737: case ('{'):
738: if (0 == p->ign)
739: texiwarn(p, "unexpected \"{\"");
740: advance(p, buf, pos);
741: continue;
742: case ('@'):
743: break;
744: default:
745: texiword(p, buf, sz, pos, '\0');
746: continue;
747: }
748:
1.7 ! kristaps 749: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 750: advanceto(p, buf, pos, end);
1.7 ! kristaps 751: if (NULL != macro)
! 752: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 753: if (TEXICMD__MAX == cmd)
754: continue;
755: if (NULL != texitoks[cmd].fp)
756: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
757: }
758: }
759:
760: /*
761: * Parse a single word or command.
762: * This will return immediately at the EOF.
763: */
764: void
765: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
766: {
1.7 ! kristaps 767: size_t end;
! 768: enum texicmd cmd;
! 769: struct teximacro *macro;
1.1 kristaps 770:
771: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
772: return;
773:
774: switch (buf[*pos]) {
775: case ('}'):
776: if (0 == p->ign)
777: texiwarn(p, "unexpected \"}\"");
778: advance(p, buf, pos);
779: return;
780: case ('{'):
781: if (0 == p->ign)
782: texiwarn(p, "unexpected \"{\"");
783: advance(p, buf, pos);
784: return;
785: case ('@'):
786: break;
787: default:
788: texiword(p, buf, sz, pos, '\0');
789: return;
790: }
791:
1.7 ! kristaps 792: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 793: advanceto(p, buf, pos, end);
1.7 ! kristaps 794: if (NULL != macro)
! 795: texiexecmacro(p, macro, buf, sz, pos);
1.1 kristaps 796: if (TEXICMD__MAX == cmd)
797: return;
798: if (NULL != texitoks[cmd].fp)
799: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
800: }
801:
802: /*
803: * This is used in the @deffn type of command.
804: * These have an arbitrary number of line arguments; however, these
805: * arguments may or may not be surrounded by brackets.
806: * In this function, we parse each one as either a bracketed or
807: * non-bracketed argument, returning 0 when we've reached the end of
808: * line or 1 otherwise.
809: */
810: int
811: parselinearg(struct texi *p, const char *buf, size_t sz, size_t *pos)
812: {
813:
814: while (*pos < sz && isws(buf[*pos])) {
815: p->seenws = 1;
816: advance(p, buf, pos);
817: }
818:
819: if (*pos < sz && '{' == buf[*pos])
820: parsebracket(p, buf, sz, pos);
1.3 kristaps 821: else if (*pos < sz && '\n' != buf[*pos])
1.1 kristaps 822: parsesingle(p, buf, sz, pos);
823: else
824: return(0);
825:
826: return(1);
827: }
828:
829: /*
830: * Parse til the end of the buffer.
831: */
832: void
833: parseeof(struct texi *p, const char *buf, size_t sz)
834: {
835: size_t pos;
836:
837: for (pos = 0; pos < sz; )
838: parsesingle(p, buf, sz, &pos);
839: }
840:
841: /*
842: * Parse a block sequence until we have the "@end endtoken" command
843: * invocation.
844: * This will return immediately at EOF.
845: */
846: void
847: parseto(struct texi *p, const char *buf,
848: size_t sz, size_t *pos, const char *endtoken)
849: {
1.7 ! kristaps 850: size_t end;
! 851: enum texicmd cmd;
! 852: size_t endtoksz;
! 853: struct teximacro *macro;
1.1 kristaps 854:
855: endtoksz = strlen(endtoken);
856: assert(endtoksz > 0);
857:
858: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
859: switch (buf[*pos]) {
860: case ('}'):
861: if (0 == p->ign)
862: texiwarn(p, "unexpected \"}\"");
863: advance(p, buf, pos);
864: continue;
865: case ('{'):
866: if (0 == p->ign)
867: texiwarn(p, "unexpected \"{\"");
868: advance(p, buf, pos);
869: continue;
870: case ('@'):
871: break;
872: default:
873: texiword(p, buf, sz, pos, '\0');
874: continue;
875: }
876:
1.7 ! kristaps 877: cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
1.1 kristaps 878: advanceto(p, buf, pos, end);
879: if (TEXICMD_END == cmd) {
880: while (*pos < sz && isws(buf[*pos]))
881: advance(p, buf, pos);
882: /*
883: * FIXME: check the full word, not just its
884: * initial substring!
885: */
886: if (sz - *pos >= endtoksz && 0 == strncmp
887: (&buf[*pos], endtoken, endtoksz)) {
888: advanceeoln(p, buf, sz, pos, 0);
889: break;
890: }
891: if (0 == p->ign)
892: texiwarn(p, "unexpected \"end\"");
893: advanceeoln(p, buf, sz, pos, 0);
894: continue;
1.7 ! kristaps 895: }
! 896: if (NULL != macro)
! 897: texiexecmacro(p, macro, buf, sz, pos);
! 898: if (TEXICMD__MAX == cmd)
! 899: continue;
! 900: if (NULL != texitoks[cmd].fp)
! 901: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1.1 kristaps 902: }
903: }
904:
905: /*
906: * Memory-map the file "fname" and begin parsing it unless "parse" is
907: * zero, in which case we just dump the file to stdout (making sure it
908: * doesn't trip up mdoc(7) along the way).
909: * This can be called in a nested context.
910: */
911: void
912: parsefile(struct texi *p, const char *fname, int parse)
913: {
914: struct texifile *f;
915: int fd;
916: struct stat st;
917: size_t i;
918:
1.5 kristaps 919: if (64 == p->filepos)
1.6 kristaps 920: texierr(p, "too many open files");
1.1 kristaps 921: f = &p->files[p->filepos];
922: memset(f, 0, sizeof(struct texifile));
923:
924: f->name = fname;
925: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
926: texiabort(p, fname);
927: } else if (-1 == fstat(fd, &st)) {
928: close(fd);
929: texiabort(p, fname);
930: }
931:
932: f->mapsz = st.st_size;
933: f->map = mmap(NULL, f->mapsz,
934: PROT_READ, MAP_SHARED, fd, 0);
935: close(fd);
936:
937: if (MAP_FAILED == f->map)
938: texiabort(p, fname);
939:
940: p->filepos++;
941: if ( ! parse) {
942: /*
943: * We're printing verbatim output.
944: * Make sure it doesn't get interpreted as mdoc by
945: * escaping escapes and making sure leading dots don't
946: * trigger mdoc(7) expansion.
947: */
948: for (i = 0; i < f->mapsz; i++) {
949: if (i > 0 && '.' == f->map[i])
950: if ('\n' == f->map[i - 1])
951: fputs("\\&", stdout);
952: putchar(f->map[i]);
953: if ('\\' == f->map[i])
954: putchar('e');
955: }
956: } else
957: parseeof(p, f->map, f->mapsz);
958: texifilepop(p);
959: }
960:
1.2 kristaps 961: /*
962: * Look up the value to a stored pair's value starting in "buf" from
963: * start to end.
964: * Return the pointer to the value memory, which can be NULL if the
965: * pointer key does not exist.
966: * The pointer can point to NULL if the value has been unset.
967: */
968: static char **
969: valuequery(const struct texi *p,
970: const char *buf, size_t start, size_t end)
971: {
972: size_t i, sz, len;
973:
974: assert(end >= start);
975: /* Ignore zero-length. */
976: if (0 == (len = (end - start)))
977: return(NULL);
978: for (i = 0; i < p->valsz; i++) {
979: sz = strlen(p->vals[i].key);
980: if (sz != len)
981: continue;
982: if (0 == strncmp(p->vals[i].key, &buf[start], len))
983: return(&p->vals[i].value);
984: }
985: return(NULL);
986: }
987:
988: /*
989: * Parse a key until the end of line, e.g., @clear foo\n, and return the
990: * pointer to its value via valuequery().
991: */
992: static char **
993: valuelquery(struct texi *p, const char *buf, size_t sz, size_t *pos)
994: {
995: size_t start, end;
996: char **ret;
997:
998: while (*pos < sz && isws(buf[*pos]))
999: advance(p, buf, pos);
1000: if (*pos == sz)
1001: return(NULL);
1002: for (start = end = *pos; end < sz; end++)
1003: if ('\n' == buf[end])
1004: break;
1005: advanceto(p, buf, pos, end);
1006: if (*pos < sz) {
1007: assert('\n' == buf[*pos]);
1008: advance(p, buf, pos);
1009: }
1010: if (NULL == (ret = valuequery(p, buf, start, end)))
1011: return(NULL);
1012: return(ret);
1013: }
1014:
1015: void
1016: valuelclear(struct texi *p, const char *buf, size_t sz, size_t *pos)
1017: {
1018: char **ret;
1019:
1020: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1021: return;
1022: free(*ret);
1023: *ret = NULL;
1024: }
1025:
1026: const char *
1027: valuellookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1028: {
1029: char **ret;
1030:
1031: if (NULL == (ret = valuelquery(p, buf, sz, pos)))
1032: return(NULL);
1033: return(*ret);
1034: }
1035:
1036: /*
1037: * Parse a key from a bracketed string, e.g., @value{foo}, and return
1038: * the pointer to its value.
1039: * If the returned pointer is NULL, either there was no string within
1040: * the brackets (or no brackets), or the value was not found, or the
1041: * value had previously been unset.
1042: */
1043: const char *
1044: valueblookup(struct texi *p, const char *buf, size_t sz, size_t *pos)
1045: {
1046: size_t start, end;
1047: char **ret;
1048:
1049: while (*pos < sz && isws(buf[*pos]))
1050: advance(p, buf, pos);
1051: if (*pos == sz || '{' != buf[*pos])
1052: return(NULL);
1053: advance(p, buf, pos);
1054: for (start = end = *pos; end < sz; end++)
1055: if ('}' == buf[end])
1056: break;
1057: advanceto(p, buf, pos, end);
1058: if (*pos < sz) {
1059: assert('}' == buf[*pos]);
1060: advance(p, buf, pos);
1061: }
1062: if (NULL == (ret = valuequery(p, buf, start, end)))
1063: return(NULL);
1064: return(*ret);
1065: }
1066:
1067: void
1068: valueadd(struct texi *p, char *key, char *val)
1069: {
1070: size_t i;
1071:
1072: assert(NULL != key);
1073: assert(NULL != val);
1074:
1075: for (i = 0; i < p->valsz; i++)
1076: if (0 == strcmp(p->vals[i].key, key))
1077: break;
1078:
1079: if (i < p->valsz) {
1080: free(key);
1081: free(p->vals[i].value);
1082: p->vals[i].value = val;
1083: } else {
1.4 kristaps 1084: /* FIXME: reallocarray() */
1.2 kristaps 1085: p->vals = realloc(p->vals,
1086: (p->valsz + 1) *
1087: sizeof(struct texivalue));
1.4 kristaps 1088: if (NULL == p->vals)
1089: texiabort(p, NULL);
1.2 kristaps 1090: p->vals[p->valsz].key = key;
1091: p->vals[p->valsz].value = val;
1092: p->valsz++;
1093: }
1.7 ! kristaps 1094: }
! 1095:
! 1096: /*
! 1097: * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
! 1098: * declaration form, @macro foo {arg1, ...}) and textually convert it to
! 1099: * an array of arguments of size "argsz".
! 1100: * These need to be freed individually and as a whole.
! 1101: * NOTE: this will puke on @, or @} macros, which can trick it into
! 1102: * stopping argument parsing earlier.
! 1103: * Ergo, textual: this doesn't interpret the arguments in any way.
! 1104: */
! 1105: char **
! 1106: argparse(struct texi *p, const char *buf,
! 1107: size_t sz, size_t *pos, size_t *argsz)
! 1108: {
! 1109: char **args;
! 1110: size_t start, end, stack;
! 1111:
! 1112: while (*pos < sz && isws(buf[*pos]))
! 1113: advance(p, buf, pos);
! 1114:
! 1115: args = NULL;
! 1116: *argsz = 0;
! 1117:
! 1118: /* Check for no arguments. */
! 1119: if ('{' != buf[*pos])
! 1120: return(args);
! 1121:
! 1122: /* Parse til the closing '}', putting into the array. */
! 1123: advance(p, buf, pos);
! 1124: while (*pos < sz) {
! 1125: while (*pos < sz && isws(buf[*pos]))
! 1126: advance(p, buf, pos);
! 1127: start = *pos;
! 1128: stack = 0;
! 1129: while (*pos < sz) {
! 1130: /*
! 1131: * According to the manual, commas within
! 1132: * embedded commands are escaped.
! 1133: * We keep track of embedded-ness in the "stack"
! 1134: * state anyway, so this is free.
! 1135: */
! 1136: if (0 == stack && ',' == buf[*pos])
! 1137: break;
! 1138: else if (0 == stack && '}' == buf[*pos])
! 1139: break;
! 1140: else if (0 != stack && '}' == buf[*pos])
! 1141: stack--;
! 1142: else if ('{' == buf[*pos])
! 1143: stack++;
! 1144: advance(p, buf, pos);
! 1145: }
! 1146: if (stack)
! 1147: texiwarn(p, "unterminated macro "
! 1148: "in macro arguments");
! 1149: if ((end = *pos) == sz)
! 1150: break;
! 1151: /* Test for zero-length '{ }'. */
! 1152: if (start == end && '}' == buf[*pos] && 0 == *argsz)
! 1153: break;
! 1154: if (start == end)
! 1155: texierr(p, "zero-length argument");
! 1156: /* FIXME: use reallocarray. */
! 1157: args = realloc
! 1158: (args, sizeof(char *) *
! 1159: (*argsz + 1));
! 1160: if (NULL == args)
! 1161: texiabort(p, NULL);
! 1162: args[*argsz] = malloc(end - start + 1);
! 1163: if (NULL == args[*argsz])
! 1164: texiabort(p, NULL);
! 1165: memcpy(args[*argsz],
! 1166: &buf[start], end - start);
! 1167: args[*argsz][end - start] = '\0';
! 1168: (*argsz)++;
! 1169: if ('}' == buf[*pos])
! 1170: break;
! 1171: advance(p, buf, pos);
! 1172: }
! 1173:
! 1174: if (*pos == sz)
! 1175: texierr(p, "unterminated arguments");
! 1176: assert('}' == buf[*pos]);
! 1177: advance(p, buf, pos);
! 1178: return(args);
1.2 kristaps 1179: }
CVSweb