Annotation of texi2mdoc/main.c, Revision 1.2
1.2 ! kristaps 1: /* $Id: main.c,v 1.1.1.1 2015/02/16 22:24:43 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 ! kristaps 24: #include <libgen.h>
! 25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30:
31: /*
32: * This defines each one of the Texinfo commands that we understand.
33: * Obviously this only refers to native commands; overriden names are a
34: * different story.
35: */
36: enum texicmd {
1.2 ! kristaps 37: TEXICMD_ACRONYM,
1.1 kristaps 38: TEXICMD_A4PAPER,
39: TEXICMD_ANCHOR,
1.2 ! kristaps 40: TEXICMD_APPENDIX,
! 41: TEXICMD_APPENDIXSEC,
1.1 kristaps 42: TEXICMD_AT,
43: TEXICMD_BYE,
44: TEXICMD_CHAPTER,
45: TEXICMD_CINDEX,
46: TEXICMD_CODE,
47: TEXICMD_COMMAND,
48: TEXICMD_COMMENT,
1.2 ! kristaps 49: TEXICMD_COMMENT_LONG,
1.1 kristaps 50: TEXICMD_CONTENTS,
51: TEXICMD_COPYING,
52: TEXICMD_COPYRIGHT,
53: TEXICMD_DETAILMENU,
54: TEXICMD_DIRCATEGORY,
55: TEXICMD_DIRENTRY,
1.2 ! kristaps 56: TEXICMD_DOTS,
1.1 kristaps 57: TEXICMD_EMAIL,
58: TEXICMD_EMPH,
59: TEXICMD_END,
1.2 ! kristaps 60: TEXICMD_ENUMERATE,
1.1 kristaps 61: TEXICMD_EXAMPLE,
62: TEXICMD_FILE,
1.2 ! kristaps 63: TEXICMD_HEADING,
1.1 kristaps 64: TEXICMD_I,
65: TEXICMD_IFHTML,
66: TEXICMD_IFNOTTEX,
67: TEXICMD_IFTEX,
68: TEXICMD_IMAGE,
1.2 ! kristaps 69: TEXICMD_INCLUDE,
1.1 kristaps 70: TEXICMD_ITEM,
71: TEXICMD_ITEMIZE,
72: TEXICMD_KBD,
73: TEXICMD_LATEX,
74: TEXICMD_MENU,
75: TEXICMD_NODE,
76: TEXICMD_QUOTATION,
77: TEXICMD_PARINDENT,
1.2 ! kristaps 78: TEXICMD_PRINTINDEX,
1.1 kristaps 79: TEXICMD_REF,
80: TEXICMD_SAMP,
81: TEXICMD_SECTION,
82: TEXICMD_SETCHAPNEWPAGE,
83: TEXICMD_SETFILENAME,
84: TEXICMD_SETTITLE,
85: TEXICMD_SUBSECTION,
86: TEXICMD_TABLE,
87: TEXICMD_TEX,
88: TEXICMD_TEXSYM,
89: TEXICMD_TITLEFONT,
90: TEXICMD_TITLEPAGE,
91: TEXICMD_TOP,
92: TEXICMD_UNNUMBERED,
1.2 ! kristaps 93: TEXICMD_UNNUMBEREDSEC,
1.1 kristaps 94: TEXICMD_URL,
95: TEXICMD_VAR,
96: TEXICMD__MAX
97: };
98:
99: /*
100: * The file currently being parsed.
101: * This keeps track of our location within that file.
102: */
103: struct texifile {
104: const char *name; /* name of the file */
105: size_t line; /* current line (from zero) */
106: size_t col; /* current column in line (from zero) */
107: char *map; /* mmap'd file */
108: size_t mapsz; /* size of mmap */
109: };
110:
111: struct texi;
112:
1.2 ! kristaps 113: /*
! 114: * Callback for functions implementing texi commands.
! 115: */
1.1 kristaps 116: typedef void (*texicmdfp)(struct texi *,
117: enum texicmd, const char *, size_t, size_t *);
118:
119: /*
120: * Describes Texinfo commands, whether native or overriden.
121: */
122: struct texitok {
123: texicmdfp fp; /* callback (or NULL if none) */
124: const char *tok; /* name of the token */
125: size_t len; /* strlen(tok) */
126: };
127:
128: /*
129: * The main parse structure.
130: * This keeps any necessary information handy.
131: */
132: struct texi {
133: struct texifile files[64];
134: size_t filepos;
135: unsigned flags;
136: #define TEXI_IGN 0x01 /* don't print anything */
137: #define TEXI_HEADER (TEXI_IGN | 0x02) /* haven't seen @top yet */
138: #define TEXI_LITERAL 0x04 /* output all whitespace */
139: size_t outcol; /* column of output */
140: int outmacro; /* whether output is in line macro */
141: int seenws; /* whitespace has been ignored */
1.2 ! kristaps 142: char *dir; /* texi directory */
1.1 kristaps 143: };
144:
1.2 ! kristaps 145: /* FIXME: don't use this crap. */
1.1 kristaps 146: #define ismpunct(_x) \
147: ('.' == (_x) || \
148: ',' == (_x) || \
149: ';' == (_x))
1.2 ! kristaps 150: #define isws(_x) \
! 151: (' ' == (_x) || '\t' == (_x))
1.1 kristaps 152:
153: static void doarg1(struct texi *, enum texicmd, const char *, size_t, size_t *);
154: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
155: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
156: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
157: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 ! kristaps 158: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 159: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
160: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
161: static void doifnottex(struct texi *, enum texicmd, const char *, size_t, size_t *);
162: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
163: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
164: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 ! kristaps 165: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 166: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
167: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
168: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
169: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
170: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
171: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
172: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
173: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
174: static void dosh(struct texi *, enum texicmd, const char *, size_t, size_t *);
175: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
176: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
177:
178: static const struct texitok texitoks[TEXICMD__MAX] = {
1.2 ! kristaps 179: { doarg1, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1 kristaps 180: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
181: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.2 ! kristaps 182: { dosh, "appendix", 8 }, /* TEXICMD_APPENDIX */
! 183: { dosh, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
1.1 kristaps 184: { dosymbol, "@", 1 }, /* TEXICMD_AT */
185: { dobye, "bye", 3 }, /* TEXICMD_BYE */
186: { dosh, "chapter", 7 }, /* TEXICMD_CHAPTER */
187: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
188: { doliteral, "code", 4 }, /* TEXICMD_CODE */
189: { docommand, "command", 7 }, /* TEXICMD_COMMAND */
190: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 ! kristaps 191: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 192: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
193: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
194: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
195: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
196: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
197: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.2 ! kristaps 198: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.1 kristaps 199: { doarg1, "email", 5 }, /* TEXICMD_EMAIL */
200: { doemph, "emph", 4 }, /* TEXICMD_EMPH */
201: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 ! kristaps 202: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.1 kristaps 203: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
204: { dofile, "file", 4 }, /* TEXICMD_FILE */
1.2 ! kristaps 205: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.1 kristaps 206: { doitalic, "i", 1 }, /* TEXICMD_I */
207: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
208: { doifnottex, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
209: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
210: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 ! kristaps 211: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.1 kristaps 212: { doitem, "item", 4 }, /* TEXICMD_ITEM */
213: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
214: { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
215: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
216: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
217: { doignline, "node", 4 }, /* TEXICMD_NODE */
218: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.2 ! kristaps 219: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1 kristaps 220: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
221: { dobracket, "ref", 3 }, /* TEXICMD_REF */
222: { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
223: { dosection, "section", 7 }, /* TEXICMD_SECTION */
224: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
225: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
226: { doignline, "settitle", 8 }, /* TEXICMD_SETTITLE */
227: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
228: { dotable, "table", 5 }, /* TEXICMD_TABLE */
229: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
230: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
231: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
232: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
233: { dotop, "top", 3 }, /* TEXICMD_TOP */
234: { dosh, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 ! kristaps 235: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.1 kristaps 236: { doarg1, "url", 3 }, /* TEXICMD_URL */
237: { doliteral, "var", 3 }, /* TEXICMD_VAR */
238: };
239:
1.2 ! kristaps 240: /*
! 241: * Unmap the top-most file that we're using.
! 242: */
1.1 kristaps 243: static void
244: texifilepop(struct texi *p)
245: {
246: struct texifile *f;
247:
248: assert(p->filepos > 0);
249: f = &p->files[--p->filepos];
250: munmap(f->map, f->mapsz);
251: }
252:
1.2 ! kristaps 253: /*
! 254: * Unmap all files that we're currently using.
! 255: * The utility should exit(...) after this is called.
! 256: */
1.1 kristaps 257: static void
258: texiexit(struct texi *p)
259: {
260:
261: while (p->filepos > 0)
262: texifilepop(p);
1.2 ! kristaps 263: free(p->dir);
1.1 kristaps 264: }
265:
1.2 ! kristaps 266: /*
! 267: * Fatal error: unmap all files and exit.
! 268: * The "errstring" is passed to perror(3).
! 269: */
1.1 kristaps 270: static void
1.2 ! kristaps 271: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 272: {
273:
274: perror(errstring);
275: texiexit(p);
276: exit(EXIT_FAILURE);
277: }
278:
279: /*
280: * Print a generic warning message (to stderr) tied to our current
281: * location in the parse sequence.
282: */
283: static void
284: texiwarn(const struct texi *p, const char *fmt, ...)
285: {
286: va_list ap;
287:
1.2 ! kristaps 288: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 289: p->files[p->filepos - 1].name,
290: p->files[p->filepos - 1].line + 1,
291: p->files[p->filepos - 1].col + 1);
292: va_start(ap, fmt);
293: vfprintf(stderr, fmt, ap);
294: va_end(ap);
295: fputc('\n', stderr);
296: }
297:
1.2 ! kristaps 298: static void
! 299: texierr(struct texi *p, const char *fmt, ...)
! 300: {
! 301: va_list ap;
! 302:
! 303: fprintf(stderr, "%s:%zu:%zu: error: ",
! 304: p->files[p->filepos - 1].name,
! 305: p->files[p->filepos - 1].line + 1,
! 306: p->files[p->filepos - 1].col + 1);
! 307: va_start(ap, fmt);
! 308: vfprintf(stderr, fmt, ap);
! 309: va_end(ap);
! 310: fputc('\n', stderr);
! 311: texiexit(p);
! 312: exit(EXIT_FAILURE);
! 313: }
! 314:
1.1 kristaps 315: /*
316: * Put a single data character.
317: * This MUST NOT be a mdoc(7) command: it should be free text that's
318: * outputted to the screen.
319: */
320: static void
321: texiputchar(struct texi *p, char c)
322: {
323:
324: if (TEXI_IGN & p->flags)
325: return;
326:
327: putchar(c);
328: if ('\n' == c) {
329: p->outcol = 0;
330: p->outmacro = 0;
331: p->seenws = 0;
332: } else
333: p->outcol++;
334: }
335:
336: /*
337: * Put multiple characters (see texiputchar()).
338: */
339: static void
340: texiputchars(struct texi *p, const char *s)
341: {
342:
343: while ('\0' != *s)
344: texiputchar(p, *s++);
345: }
346:
347: /*
348: * Put an mdoc(7) command without the trailing newline.
349: * This should ONLY be used for mdoc(7) commands!
350: */
351: static void
352: texifputs(struct texi *p, const char *s)
353: {
354: int rc;
355:
356: if (TEXI_IGN & p->flags)
357: return;
358: if (p->outcol)
359: texiputchar(p, '\n');
360: if (EOF != (rc = fputs(s, stdout)))
361: p->outcol += rc;
362: }
363:
364: /*
365: * Put an mdoc(7) command with the trailing newline.
366: * This should ONLY be used for mdoc(7) commands!
367: */
368: static void
369: teximacro(struct texi *p, const char *s)
370: {
371:
372: if (TEXI_IGN & p->flags)
373: return;
374: if (p->outcol)
375: texiputchar(p, '\n');
376: puts(s);
377: p->outcol = 0;
378: p->seenws = 0;
379: }
380:
381: /*
382: * Advance by a single byte in the input stream.
383: */
384: static void
385: advance(struct texi *p, const char *buf, size_t *pos)
386: {
387:
388: if ('\n' == buf[*pos]) {
389: p->files[p->filepos - 1].line++;
390: p->files[p->filepos - 1].col = 0;
391: } else
392: p->files[p->filepos - 1].col++;
393:
394: (*pos)++;
395: }
396:
397: /*
398: * Advance to the next non-whitespace word in the input stream.
399: * If we're in literal mode, then print all of the whitespace as we're
400: * doing so.
401: */
402: static size_t
403: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
404: {
405:
406: if (TEXI_LITERAL & p->flags) {
407: while (*pos < sz && isspace(buf[*pos])) {
408: texiputchar(p, buf[*pos]);
409: advance(p, buf, pos);
410: }
411: return(*pos);
412: }
413:
414: while (*pos < sz && isspace(buf[*pos])) {
415: p->seenws = 1;
416: /*
417: * If it looks like we've printed a double-line, then
418: * output a paragraph.
419: * FIXME: this is stupid.
420: */
421: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
422: teximacro(p, ".Pp");
423: advance(p, buf, pos);
424: }
425: return(*pos);
426: }
427:
428: /*
429: * Advance to the EOLN in the input stream.
430: */
431: static size_t
432: advanceeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
433: {
434:
435: while (*pos < sz && '\n' != buf[*pos])
436: advance(p, buf, pos);
437: return(*pos);
438: }
439:
440: /*
441: * Advance to position "end", which is an absolute position in the
442: * current buffer greater than or equal to the current position.
443: */
444: static void
445: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
446: {
447:
448: assert(*pos <= end);
449: while (*pos < end)
450: advance(p, buf, pos);
451: }
452:
453: /*
454: * Output a free-form word in the input stream, progressing to the next
455: * command or white-space.
456: * This also will advance the input stream.
457: */
458: static void
459: texiword(struct texi *p, const char *buf, size_t sz, size_t *pos)
460: {
461:
462: /*
463: * XXX: if we're in literal mode, then we shouldn't do any
464: * reflowing of text here.
465: */
466: if (p->outcol > 72 && ! (TEXI_LITERAL & p->flags))
467: texiputchar(p, '\n');
468:
469: if (p->seenws && p->outcol && ! (TEXI_LITERAL & p->flags))
470: texiputchar(p, ' ');
471:
472: p->seenws = 0;
473:
474: while (*pos < sz && ! isspace(buf[*pos])) {
475: switch (buf[*pos]) {
476: case ('@'):
477: case ('}'):
478: case ('{'):
479: return;
480: }
481: if (*pos < sz - 1 &&
482: '`' == buf[*pos] &&
483: '`' == buf[*pos + 1]) {
484: texiputchars(p, "\\(lq");
485: advance(p, buf, pos);
486: } else if (*pos < sz - 1 &&
487: '\'' == buf[*pos] &&
488: '\'' == buf[*pos + 1]) {
489: texiputchars(p, "\\(rq");
490: advance(p, buf, pos);
491: } else
492: texiputchar(p, buf[*pos]);
493: advance(p, buf, pos);
494: }
495: }
496:
497: static enum texicmd
498: texicmd(struct texi *p, const char *buf,
499: size_t pos, size_t sz, size_t *end)
500: {
501: size_t i, len;
502:
503: assert('@' == buf[pos]);
504: for (*end = ++pos; *end < sz && ! isspace(buf[*end]); (*end)++)
1.2 ! kristaps 505: if ((*end > pos && '@' == buf[*end]) || '{' == buf[*end])
1.1 kristaps 506: break;
507:
508: len = *end - pos;
509: for (i = 0; i < TEXICMD__MAX; i++) {
510: if (len != texitoks[i].len)
511: continue;
512: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
513: return(i);
514: }
515:
516: texiwarn(p, "bad command: %.*s", (int)len, &buf[pos]);
517: return(TEXICMD__MAX);
518: }
519:
520: static void
521: parseeof(struct texi *p, const char *buf, size_t sz)
522: {
523: size_t pos = 0;
524: enum texicmd cmd;
525: size_t end;
526:
527: while ((pos = advancenext(p, buf, sz, &pos)) < sz) {
528: switch (buf[pos]) {
529: case ('}'):
530: texiwarn(p, "unexpected \"}\"");
531: advance(p, buf, &pos);
532: continue;
533: case ('{'):
534: texiwarn(p, "unexpected \"{\"");
535: advance(p, buf, &pos);
536: continue;
537: case ('@'):
538: break;
539: default:
540: texiword(p, buf, sz, &pos);
541: continue;
542: }
543:
544: cmd = texicmd(p, buf, pos, sz, &end);
545: advanceto(p, buf, &pos, end);
546: if (TEXICMD__MAX == cmd)
547: continue;
548: if (NULL != texitoks[cmd].fp)
549: (*texitoks[cmd].fp)(p, cmd, buf, sz, &pos);
550: }
551: }
552:
553: static void
554: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
555: {
556: size_t end;
557: enum texicmd cmd;
558:
559: if (*pos == sz || '{' != buf[*pos])
560: return;
561: advance(p, buf, pos);
562:
563: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
564: switch (buf[*pos]) {
565: case ('}'):
566: advance(p, buf, pos);
567: return;
568: case ('{'):
569: texiwarn(p, "unexpected \"{\"");
570: advance(p, buf, pos);
571: continue;
572: case ('@'):
573: break;
574: default:
575: texiword(p, buf, sz, pos);
576: continue;
577: }
578:
579: cmd = texicmd(p, buf, *pos, sz, &end);
580: advanceto(p, buf, pos, end);
581: if (TEXICMD__MAX == cmd)
582: continue;
583: if (NULL != texitoks[cmd].fp)
584: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
585: }
586: }
587:
588: static void
589: parseto(struct texi *p, const char *buf,
590: size_t sz, size_t *pos, const char *endtoken)
591: {
592: size_t end;
593: enum texicmd cmd;
594: size_t endtoksz;
595:
596: endtoksz = strlen(endtoken);
597: assert(endtoksz > 0);
598:
599: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
600: switch (buf[*pos]) {
601: case ('}'):
602: texiwarn(p, "unexpected \"}\"");
603: advance(p, buf, pos);
604: continue;
605: case ('{'):
606: texiwarn(p, "unexpected \"{\"");
607: advance(p, buf, pos);
608: continue;
609: case ('@'):
610: break;
611: default:
612: texiword(p, buf, sz, pos);
613: continue;
614: }
615:
616: cmd = texicmd(p, buf, *pos, sz, &end);
617: advanceto(p, buf, pos, end);
618: if (TEXICMD_END == cmd) {
1.2 ! kristaps 619: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 620: advance(p, buf, pos);
621: /*
622: * FIXME: skip tabs and also check the full
623: * word, not just its initial substring!
624: */
625: if (sz - *pos >= endtoksz && 0 == strncmp
626: (&buf[*pos], endtoken, endtoksz)) {
627: advanceeoln(p, buf, sz, pos);
628: break;
629: }
630: texiwarn(p, "unexpected \"end\"");
631: advanceeoln(p, buf, sz, pos);
632: continue;
633: } else if (TEXICMD__MAX != cmd)
634: if (NULL != texitoks[cmd].fp)
635: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
636: }
637: }
638:
639: static void
1.2 ! kristaps 640: parsefile(struct texi *p, const char *fname)
! 641: {
! 642: struct texifile *f;
! 643: int fd;
! 644: struct stat st;
! 645:
! 646: assert(p->filepos < 64);
! 647: f = &p->files[p->filepos];
! 648: memset(f, 0, sizeof(struct texifile));
! 649:
! 650: f->name = fname;
! 651: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
! 652: texiabort(p, fname);
! 653: } else if (-1 == fstat(fd, &st)) {
! 654: close(fd);
! 655: texiabort(p, fname);
! 656: }
! 657:
! 658: f->mapsz = st.st_size;
! 659: f->map = mmap(NULL, f->mapsz,
! 660: PROT_READ, MAP_SHARED, fd, 0);
! 661: close(fd);
! 662:
! 663: if (MAP_FAILED == f->map)
! 664: texiabort(p, fname);
! 665:
! 666: p->filepos++;
! 667: parseeof(p, f->map, f->mapsz);
! 668: texifilepop(p);
! 669: }
! 670:
! 671: static void
1.1 kristaps 672: doignblock(struct texi *p, enum texicmd cmd,
673: const char *buf, size_t sz, size_t *pos)
674: {
675: unsigned int sv = p->flags;
676: const char *blockname;
677:
678: p->flags |= TEXI_IGN;
679: switch (cmd) {
680: case (TEXICMD_COPYING):
681: blockname = "copying";
682: break;
683: case (TEXICMD_DETAILMENU):
684: blockname = "detailmenu";
685: break;
686: case (TEXICMD_DIRENTRY):
687: blockname = "direntry";
688: break;
689: case (TEXICMD_IFHTML):
690: blockname = "ifhtml";
691: break;
692: case (TEXICMD_IFTEX):
693: blockname = "iftex";
694: break;
695: case (TEXICMD_MENU):
696: blockname = "menu";
697: break;
698: case (TEXICMD_TEX):
699: blockname = "tex";
700: break;
701: case (TEXICMD_TITLEPAGE):
702: blockname = "titlepage";
703: break;
704: default:
705: abort();
706: }
707: parseto(p, buf, sz, pos, blockname);
708: p->flags = sv;
709: }
710:
711: static void
712: doifnottex(struct texi *p, enum texicmd cmd,
713: const char *buf, size_t sz, size_t *pos)
714: {
715:
716: parseto(p, buf, sz, pos, "ifnottex");
717: }
718:
719: static void
720: doinline(struct texi *p, const char *buf,
721: size_t sz, size_t *pos, const char *macro)
722: {
723:
724: if ( ! p->outmacro)
725: texifputs(p, ".");
726: texiputchars(p, macro);
727: texiputchar(p, ' ');
728: p->seenws = 0;
729: p->outmacro++;
730: parsebracket(p, buf, sz, pos);
731: p->outmacro--;
732: if (*pos < sz - 1 &&
733: ismpunct(buf[*pos]) &&
734: isspace(buf[*pos + 1])) {
735: texiputchar(p, ' ');
736: texiputchar(p, buf[*pos]);
737: advance(p, buf, pos);
738: }
739: if ( ! p->outmacro)
740: texiputchar(p, '\n');
741: }
742:
743: static void
1.2 ! kristaps 744: doinclude(struct texi *p, enum texicmd cmd,
! 745: const char *buf, size_t sz, size_t *pos)
! 746: {
! 747: char fname[PATH_MAX], path[PATH_MAX];
! 748: size_t i;
! 749: int rc;
! 750:
! 751: while (*pos < sz && ' ' == buf[*pos])
! 752: advance(p, buf, pos);
! 753:
! 754: /* Read in the filename. */
! 755: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
! 756: if (i == sizeof(fname) - 1)
! 757: break;
! 758: fname[i] = buf[*pos];
! 759: advance(p, buf, pos);
! 760: }
! 761:
! 762: if (i == 0)
! 763: texierr(p, "path too short");
! 764: else if ('\n' != buf[*pos])
! 765: texierr(p, "path too long");
! 766: else if ('/' == fname[0])
! 767: texierr(p, "no absolute paths");
! 768: fname[i] = '\0';
! 769:
! 770: if (strstr(fname, "../") || strstr(fname, "/.."))
! 771: texierr(p, "insecure path");
! 772:
! 773: /* Append filename to original name's directory. */
! 774: rc = snprintf(path, sizeof(path), "%s/%s", p->dir, fname);
! 775: if (rc < 0)
! 776: texierr(p, "couldn't format filename");
! 777: else if ((size_t)rc >= sizeof(path))
! 778: texierr(p, "path too long");
! 779:
! 780: /* Pump through to parser. */
! 781: parsefile(p, path);
! 782: }
! 783:
! 784: static void
1.1 kristaps 785: doitalic(struct texi *p, enum texicmd cmd,
786: const char *buf, size_t sz, size_t *pos)
787: {
788:
789: texiputchars(p, "\\fI");
790: parsebracket(p, buf, sz, pos);
791: texiputchars(p, "\\fP");
792: }
793:
794: static void
795: doliteral(struct texi *p, enum texicmd cmd,
796: const char *buf, size_t sz, size_t *pos)
797: {
798:
799: if (TEXI_LITERAL & p->flags)
800: parsebracket(p, buf, sz, pos);
801: else
802: doinline(p, buf, sz, pos, "Li");
803: }
804:
805: static void
806: doemph(struct texi *p, enum texicmd cmd,
807: const char *buf, size_t sz, size_t *pos)
808: {
809:
810: if (TEXI_LITERAL & p->flags)
811: doitalic(p, cmd, buf, sz, pos);
812: else
813: doinline(p, buf, sz, pos, "Em");
814: }
815:
816: static void
817: docommand(struct texi *p, enum texicmd cmd,
818: const char *buf, size_t sz, size_t *pos)
819: {
820:
821: doinline(p, buf, sz, pos, "Xr");
822: }
823:
824: static void
825: dobracket(struct texi *p, enum texicmd cmd,
826: const char *buf, size_t sz, size_t *pos)
827: {
828:
829: parsebracket(p, buf, sz, pos);
830: }
831:
832: static void
833: dofile(struct texi *p, enum texicmd cmd,
834: const char *buf, size_t sz, size_t *pos)
835: {
836:
837: if (TEXI_LITERAL & p->flags)
838: parsebracket(p, buf, sz, pos);
839: else
840: doinline(p, buf, sz, pos, "Pa");
841: }
842:
843: static void
844: doexample(struct texi *p, enum texicmd cmd,
845: const char *buf, size_t sz, size_t *pos)
846: {
847: unsigned int sv;
848:
849: teximacro(p, ".Bd -literal");
850: advanceeoln(p, buf, sz, pos);
851: if ('\n' == buf[*pos])
852: advance(p, buf, pos);
853: sv = p->flags;
854: p->flags |= TEXI_LITERAL;
855: parseto(p, buf, sz, pos, "example");
856: p->flags = sv;
857: teximacro(p, ".Ed");
858: }
859:
860: static void
861: dobye(struct texi *p, enum texicmd cmd,
862: const char *buf, size_t sz, size_t *pos)
863: {
864:
865: texiexit(p);
866: exit(EXIT_SUCCESS);
867: }
868:
869: static void
870: dosymbol(struct texi *p, enum texicmd cmd,
871: const char *buf, size_t sz, size_t *pos)
872: {
873:
874: switch (cmd) {
875: case (TEXICMD_AT):
876: texiputchars(p, "@");
877: break;
878: case (TEXICMD_COPYRIGHT):
879: texiputchars(p, "\\(co");
880: break;
1.2 ! kristaps 881: case (TEXICMD_DOTS):
! 882: texiputchars(p, "...");
! 883: break;
1.1 kristaps 884: case (TEXICMD_LATEX):
885: texiputchars(p, "LaTeX");
886: break;
887: case (TEXICMD_TEXSYM):
888: texiputchars(p, "TeX");
889: break;
890: default:
891: abort();
892: }
893:
894: doignbracket(p, cmd, buf, sz, pos);
895: }
896:
897: static void
898: doquotation(struct texi *p, enum texicmd cmd,
899: const char *buf, size_t sz, size_t *pos)
900: {
901:
902: teximacro(p, ".Qo");
903: parseto(p, buf, sz, pos, "quotation");
904: teximacro(p, ".Qc");
905: }
906:
907: static void
908: doarg1(struct texi *p, enum texicmd cmd,
909: const char *buf, size_t sz, size_t *pos)
910: {
911:
912: if (*pos == sz || '{' != buf[*pos])
913: return;
914: advance(p, buf, pos);
915: if ( ! p->outmacro)
916: texifputs(p, ".");
917: switch (cmd) {
918: case (TEXICMD_EMAIL):
919: texiputchars(p, "Lk ");
920: break;
921: case (TEXICMD_URL):
922: texiputchars(p, "Mt ");
923: break;
924: default:
925: abort();
926: }
927: while (*pos < sz && '}' != buf[*pos] && ',' != buf[*pos]) {
928: texiputchar(p, buf[*pos]);
929: advance(p, buf, pos);
930: }
931: while (*pos < sz && '}' != buf[*pos])
932: advance(p, buf, pos);
933: if (*pos < sz)
934: advance(p, buf, pos);
935: if (*pos < sz - 1 &&
936: ismpunct(buf[*pos]) &&
937: isspace(buf[*pos + 1])) {
938: texiputchar(p, ' ');
939: texiputchar(p, buf[*pos]);
940: advance(p, buf, pos);
941: }
942: if ( ! p->outmacro)
943: texiputchar(p, '\n');
944: }
945:
946: static void
947: dosubsection(struct texi *p, enum texicmd cmd,
948: const char *buf, size_t sz, size_t *pos)
949: {
950:
951: if (TEXI_IGN & p->flags) {
952: advanceeoln(p, buf, sz, pos);
953: return;
954: }
955: while (*pos < sz && ' ' == buf[*pos])
956: advance(p, buf, pos);
957: texifputs(p, ".Pp");
958: while (*pos < sz && '\n' != buf[*pos]) {
959: texiputchar(p, buf[*pos]);
960: advance(p, buf, pos);
961: }
962: texifputs(p, ".Pp");
963: }
964:
965: static void
966: dosection(struct texi *p, enum texicmd cmd,
967: const char *buf, size_t sz, size_t *pos)
968: {
969:
970: if (TEXI_IGN & p->flags) {
971: advanceeoln(p, buf, sz, pos);
972: return;
973: }
1.2 ! kristaps 974: while (*pos < sz && isws(buf[*pos]) )
1.1 kristaps 975: advance(p, buf, pos);
976: texifputs(p, ".Ss ");
977: while (*pos < sz && '\n' != buf[*pos]) {
978: texiputchar(p, buf[*pos]);
979: advance(p, buf, pos);
980: }
981: texiputchar(p, '\n');
982: }
983:
984: static void
985: dosh(struct texi *p, enum texicmd cmd,
986: const char *buf, size_t sz, size_t *pos)
987: {
988:
989: if (TEXI_IGN & p->flags) {
990: advanceeoln(p, buf, sz, pos);
991: return;
992: }
1.2 ! kristaps 993: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 994: advance(p, buf, pos);
995: texifputs(p, ".Sh ");
996: while (*pos < sz && '\n' != buf[*pos]) {
997: texiputchar(p, toupper(buf[*pos]));
998: advance(p, buf, pos);
999: }
1000: texiputchar(p, '\n');
1001: }
1002:
1003: static void
1004: dotop(struct texi *p, enum texicmd cmd,
1005: const char *buf, size_t sz, size_t *pos)
1006: {
1007:
1008: p->flags &= ~TEXI_HEADER;
1009: advanceeoln(p, buf, sz, pos);
1.2 ! kristaps 1010: teximacro(p, ".Dd $Mdocdate: February 16 2015 $");
1.1 kristaps 1011: teximacro(p, ".Dt SOMETHING 7");
1012: teximacro(p, ".Os");
1013: teximacro(p, ".Sh NAME");
1014: teximacro(p, ".Nm Something");
1015: teximacro(p, ".Nd Something");
1016: }
1017:
1018: static void
1019: doitem(struct texi *p, enum texicmd cmd,
1020: const char *buf, size_t sz, size_t *pos)
1021: {
1022: size_t end;
1023:
1024: /* See if we have arguments... */
1025: for (end = *pos; end < sz; end++)
1.2 ! kristaps 1026: if ( ! isws(buf[end]))
1.1 kristaps 1027: break;
1028:
1029: /* If we have arguments, print them too. */
1030: if ('\n' != buf[end]) {
1031: texifputs(p, ".It");
1032: /* FIXME: process commands. */
1033: while (*pos < sz && '\n' != buf[*pos]) {
1034: texiputchar(p, buf[*pos]);
1035: advance(p, buf, pos);
1036: }
1037: texiputchar(p, '\n');
1038: } else
1039: teximacro(p, ".It");
1040: }
1041:
1042: static void
1043: dotable(struct texi *p, enum texicmd cmd,
1044: const char *buf, size_t sz, size_t *pos)
1045: {
1046:
1047: teximacro(p, ".Bl -tag -width Ds");
1048: parseto(p, buf, sz, pos, "table");
1049: teximacro(p, ".El");
1050: }
1051:
1052: static void
1.2 ! kristaps 1053: doenumerate(struct texi *p, enum texicmd cmd,
! 1054: const char *buf, size_t sz, size_t *pos)
! 1055: {
! 1056:
! 1057: teximacro(p, ".Bl -enum");
! 1058: parseto(p, buf, sz, pos, "enumerate");
! 1059: teximacro(p, ".El");
! 1060: }
! 1061:
! 1062: static void
1.1 kristaps 1063: doitemize(struct texi *p, enum texicmd cmd,
1064: const char *buf, size_t sz, size_t *pos)
1065: {
1066:
1067: teximacro(p, ".Bl -bullet");
1068: parseto(p, buf, sz, pos, "itemize");
1069: teximacro(p, ".El");
1070: }
1071:
1072: static void
1073: doignbracket(struct texi *p, enum texicmd cmd,
1074: const char *buf, size_t sz, size_t *pos)
1075: {
1076: unsigned int sv = p->flags;
1077:
1078: p->flags |= TEXI_IGN;
1079: parsebracket(p, buf, sz, pos);
1080: p->flags = sv;
1081: }
1082:
1083: static void
1084: doignline(struct texi *p, enum texicmd cmd,
1085: const char *buf, size_t sz, size_t *pos)
1086: {
1087:
1088: advanceeoln(p, buf, sz, pos);
1089: if (*pos < sz)
1090: advance(p, buf, pos);
1091: }
1092:
1093: int
1094: main(int argc, char *argv[])
1095: {
1096: struct texi texi;
1.2 ! kristaps 1097: int c;
! 1098: char *path, *dir;
1.1 kristaps 1099: const char *progname;
1100:
1101: progname = strrchr(argv[0], '/');
1102: if (progname == NULL)
1103: progname = argv[0];
1104: else
1105: ++progname;
1106:
1107: while (-1 != (c = getopt(argc, argv, "")))
1108: switch (c) {
1109: default:
1110: goto usage;
1111: }
1112:
1113: argv += optind;
1114: if (0 == (argc -= optind))
1115: goto usage;
1116:
1.2 ! kristaps 1117: if (NULL == (path = strdup(argv[0]))) {
! 1118: perror(NULL);
! 1119: exit(EXIT_FAILURE);
! 1120: } else if (NULL == (dir = dirname(path))) {
! 1121: perror(argv[0]);
! 1122: free(path);
! 1123: exit(EXIT_FAILURE);
! 1124: }
! 1125: free(path);
! 1126:
1.1 kristaps 1127: memset(&texi, 0, sizeof(struct texi));
1128: texi.flags = TEXI_HEADER;
1.2 ! kristaps 1129: texi.dir = strdup(dir);
! 1130: parsefile(&texi, argv[0]);
! 1131: texiexit(&texi);
! 1132: return(EXIT_FAILURE);
1.1 kristaps 1133: usage:
1134: fprintf(stderr, "usage: %s file\n", progname);
1135: return(EXIT_FAILURE);
1136: }
CVSweb