Annotation of texi2mdoc/main.c, Revision 1.8
1.8 ! kristaps 1: /* $Id: main.c,v 1.7 2015/02/18 11:08:58 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 kristaps 24: #include <libgen.h>
25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
1.6 kristaps 30: #include <unistd.h>
1.1 kristaps 31:
32: /*
33: * This defines each one of the Texinfo commands that we understand.
34: * Obviously this only refers to native commands; overriden names are a
35: * different story.
36: */
37: enum texicmd {
1.2 kristaps 38: TEXICMD_ACRONYM,
1.1 kristaps 39: TEXICMD_A4PAPER,
40: TEXICMD_ANCHOR,
1.2 kristaps 41: TEXICMD_APPENDIX,
42: TEXICMD_APPENDIXSEC,
1.3 kristaps 43: TEXICMD_ASTERISK,
1.1 kristaps 44: TEXICMD_AT,
1.3 kristaps 45: TEXICMD_AUTHOR,
46: TEXICMD_BANG,
1.7 kristaps 47: TEXICMD_BULLET,
1.1 kristaps 48: TEXICMD_BYE,
1.5 kristaps 49: TEXICMD_CENTER,
1.1 kristaps 50: TEXICMD_CHAPTER,
51: TEXICMD_CINDEX,
1.3 kristaps 52: TEXICMD_CITE,
1.1 kristaps 53: TEXICMD_CODE,
1.3 kristaps 54: TEXICMD_COLON,
1.1 kristaps 55: TEXICMD_COMMAND,
56: TEXICMD_COMMENT,
1.2 kristaps 57: TEXICMD_COMMENT_LONG,
1.1 kristaps 58: TEXICMD_CONTENTS,
59: TEXICMD_COPYING,
60: TEXICMD_COPYRIGHT,
1.3 kristaps 61: TEXICMD_DEFTYPEFN,
62: TEXICMD_DEFTYPEFNX,
63: TEXICMD_DEFTYPEFUN,
64: TEXICMD_DEFTYPEFUNX,
65: TEXICMD_DEFTYPEVAR,
66: TEXICMD_DEFTYPEVR,
1.1 kristaps 67: TEXICMD_DETAILMENU,
1.3 kristaps 68: TEXICMD_DFN,
1.1 kristaps 69: TEXICMD_DIRCATEGORY,
70: TEXICMD_DIRENTRY,
1.3 kristaps 71: TEXICMD_DISPLAY,
1.2 kristaps 72: TEXICMD_DOTS,
1.1 kristaps 73: TEXICMD_EMAIL,
74: TEXICMD_EMPH,
75: TEXICMD_END,
1.2 kristaps 76: TEXICMD_ENUMERATE,
1.3 kristaps 77: TEXICMD_ENV,
1.1 kristaps 78: TEXICMD_EXAMPLE,
79: TEXICMD_FILE,
1.3 kristaps 80: TEXICMD_GROUP,
1.2 kristaps 81: TEXICMD_HEADING,
1.3 kristaps 82: TEXICMD_HEADINGS,
83: TEXICMD_HYPHEN,
1.1 kristaps 84: TEXICMD_I,
1.3 kristaps 85: TEXICMD_IFCLEAR,
1.1 kristaps 86: TEXICMD_IFHTML,
1.3 kristaps 87: TEXICMD_IFINFO,
1.1 kristaps 88: TEXICMD_IFNOTTEX,
89: TEXICMD_IFTEX,
1.3 kristaps 90: TEXICMD_IFSET,
1.1 kristaps 91: TEXICMD_IMAGE,
1.2 kristaps 92: TEXICMD_INCLUDE,
1.5 kristaps 93: TEXICMD_INSERTCOPYING,
1.1 kristaps 94: TEXICMD_ITEM,
95: TEXICMD_ITEMIZE,
96: TEXICMD_KBD,
97: TEXICMD_LATEX,
1.3 kristaps 98: TEXICMD_MATH,
1.1 kristaps 99: TEXICMD_MENU,
1.3 kristaps 100: TEXICMD_NEWLINE,
1.1 kristaps 101: TEXICMD_NODE,
1.3 kristaps 102: TEXICMD_NOINDENT,
1.8 ! kristaps 103: TEXICMD_PXREF,
1.3 kristaps 104: TEXICMD_QUESTIONMARK,
1.1 kristaps 105: TEXICMD_QUOTATION,
1.3 kristaps 106: TEXICMD_PAGE,
1.1 kristaps 107: TEXICMD_PARINDENT,
1.2 kristaps 108: TEXICMD_PRINTINDEX,
1.1 kristaps 109: TEXICMD_REF,
110: TEXICMD_SAMP,
1.7 kristaps 111: TEXICMD_SC,
1.1 kristaps 112: TEXICMD_SECTION,
1.3 kristaps 113: TEXICMD_SET,
1.1 kristaps 114: TEXICMD_SETCHAPNEWPAGE,
115: TEXICMD_SETFILENAME,
116: TEXICMD_SETTITLE,
1.3 kristaps 117: TEXICMD_SP,
118: TEXICMD_SPACE,
119: TEXICMD_SMALLEXAMPLE,
120: TEXICMD_SQUIGGLE_LEFT,
121: TEXICMD_SQUIGGLE_RIGHT,
1.8 ! kristaps 122: TEXICMD_STRONG,
1.1 kristaps 123: TEXICMD_SUBSECTION,
1.3 kristaps 124: TEXICMD_SUBTITLE,
125: TEXICMD_TAB,
1.1 kristaps 126: TEXICMD_TABLE,
127: TEXICMD_TEX,
128: TEXICMD_TEXSYM,
1.3 kristaps 129: TEXICMD_TITLE,
1.1 kristaps 130: TEXICMD_TITLEFONT,
131: TEXICMD_TITLEPAGE,
132: TEXICMD_TOP,
133: TEXICMD_UNNUMBERED,
1.2 kristaps 134: TEXICMD_UNNUMBEREDSEC,
1.3 kristaps 135: TEXICMD_UREF,
1.1 kristaps 136: TEXICMD_URL,
137: TEXICMD_VAR,
1.3 kristaps 138: TEXICMD_W,
1.8 ! kristaps 139: TEXICMD_XREF,
1.1 kristaps 140: TEXICMD__MAX
141: };
142:
143: /*
144: * The file currently being parsed.
145: * This keeps track of our location within that file.
146: */
147: struct texifile {
148: const char *name; /* name of the file */
149: size_t line; /* current line (from zero) */
150: size_t col; /* current column in line (from zero) */
151: char *map; /* mmap'd file */
152: size_t mapsz; /* size of mmap */
153: };
154:
155: struct texi;
156:
1.2 kristaps 157: /*
158: * Callback for functions implementing texi commands.
159: */
1.1 kristaps 160: typedef void (*texicmdfp)(struct texi *,
161: enum texicmd, const char *, size_t, size_t *);
162:
163: /*
164: * Describes Texinfo commands, whether native or overriden.
165: */
166: struct texitok {
167: texicmdfp fp; /* callback (or NULL if none) */
168: const char *tok; /* name of the token */
169: size_t len; /* strlen(tok) */
170: };
171:
1.3 kristaps 172: enum texilist {
173: TEXILIST_NONE = 0,
174: TEXILIST_ITEM,
175: TEXILIST_NOITEM,
176: };
177:
1.1 kristaps 178: /*
179: * The main parse structure.
180: * This keeps any necessary information handy.
181: */
182: struct texi {
1.5 kristaps 183: struct texifile files[64]; /* stack of open files */
184: size_t filepos; /* number of open files */
185: size_t outcol; /* column in output line */
186: char **dirs; /* texi directories */
187: size_t dirsz; /* number of texi directories */
1.8 ! kristaps 188: enum texilist list; /* current list (set recursively) */
! 189: int outmacro; /* if >0, output is in line macro */
! 190: int seenws; /* ws has been seen (and ignored) */
! 191: int ign; /* if >0, don't print anything */
! 192: int literal; /* if >0, literal context */
1.1 kristaps 193: };
194:
1.8 ! kristaps 195: /* FIXME: FIND A BETTER WAY. */
1.1 kristaps 196: #define ismpunct(_x) \
197: ('.' == (_x) || \
198: ',' == (_x) || \
199: ';' == (_x))
1.8 ! kristaps 200: /* Texi disregards spaces and tabs. */
1.2 kristaps 201: #define isws(_x) \
202: (' ' == (_x) || '\t' == (_x))
1.1 kristaps 203:
1.3 kristaps 204: static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 205: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
206: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 207: static void dochapter(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 208: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 209: static void dodeftypefun(struct texi *, enum texicmd, const char *, size_t, size_t *);
210: static void dodeftypevar(struct texi *, enum texicmd, const char *, size_t, size_t *);
211: static void dodisplay(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 212: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 213: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 214: static void doenv(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 215: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
216: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 ! kristaps 217: static void doignargn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 218: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
219: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
220: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 221: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 222: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
223: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
224: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 ! kristaps 225: static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 226: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 227: static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 228: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
229: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
230: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
231: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 232: static void dosp(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 233: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
234: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
235:
236: static const struct texitok texitoks[TEXICMD__MAX] = {
1.8 ! kristaps 237: { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1 kristaps 238: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
239: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.3 kristaps 240: { dochapter, "appendix", 8 }, /* TEXICMD_APPENDIX */
241: { dochapter, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
242: { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */
1.1 kristaps 243: { dosymbol, "@", 1 }, /* TEXICMD_AT */
1.3 kristaps 244: { doignline, "author", 6 }, /* TEXICMD_AUTHOR */
245: { dosymbol, "!", 1 }, /* TEXICMD_BANG */
1.7 kristaps 246: { dosymbol, "bullet", 6 }, /* TEXICMD_BULLET */
1.1 kristaps 247: { dobye, "bye", 3 }, /* TEXICMD_BYE */
1.5 kristaps 248: { doignline, "center", 5 }, /* TEXICMD_CENTER */
1.3 kristaps 249: { dochapter, "chapter", 7 }, /* TEXICMD_CHAPTER */
1.1 kristaps 250: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
251: { doliteral, "code", 4 }, /* TEXICMD_CODE */
1.3 kristaps 252: { doitalic, "cite", 4 }, /* TEXICMD_CITE */
253: { dosymbol, ":", 1 }, /* TEXICMD_COLON */
1.1 kristaps 254: { docommand, "command", 7 }, /* TEXICMD_COMMAND */
255: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 kristaps 256: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 257: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
258: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
259: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
1.3 kristaps 260: { dodeftypefun, "deftypefn", 9 }, /* TEXICMD_DEFTYPEFN */
261: { dodeftypefun, "deftypefnx", 10 }, /* TEXICMD_DEFTYPEFNX */
262: { dodeftypefun, "deftypefun", 10 }, /* TEXICMD_DEFTYPEFUN */
263: { dodeftypefun, "deftypefunx", 11 }, /* TEXICMD_DEFTYPEFUNX */
264: { dodeftypevar, "deftypevar", 10 }, /* TEXICMD_DEFTYPEVAR */
265: { dodeftypevar, "deftypevr", 9 }, /* TEXICMD_DEFTYPEVR */
1.1 kristaps 266: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
1.3 kristaps 267: { doitalic, "dfn", 3 }, /* TEXICMD_DFN */
1.1 kristaps 268: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
269: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.3 kristaps 270: { dodisplay, "display", 7 }, /* TEXICMD_DISPLAY */
1.2 kristaps 271: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.8 ! kristaps 272: { dolink, "email", 5 }, /* TEXICMD_EMAIL */
1.1 kristaps 273: { doemph, "emph", 4 }, /* TEXICMD_EMPH */
274: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 kristaps 275: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.3 kristaps 276: { doenv, "env", 3 }, /* TEXICMD_ENV */
1.1 kristaps 277: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
278: { dofile, "file", 4 }, /* TEXICMD_FILE */
1.3 kristaps 279: { doblock, "group", 5 }, /* TEXICMD_GROUP */
1.2 kristaps 280: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.3 kristaps 281: { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */
282: { dosymbol, "-", 1 }, /* TEXICMD_HYPHEN */
1.1 kristaps 283: { doitalic, "i", 1 }, /* TEXICMD_I */
1.3 kristaps 284: { doignblock, "ifclear", 7 }, /* TEXICMD_IFCLEAR */
1.1 kristaps 285: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
1.3 kristaps 286: { doignblock, "ifinfo", 6 }, /* TEXICMD_IFINFO */
287: { doblock, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
1.1 kristaps 288: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
1.3 kristaps 289: { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
1.1 kristaps 290: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 kristaps 291: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.5 kristaps 292: { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
1.1 kristaps 293: { doitem, "item", 4 }, /* TEXICMD_ITEM */
294: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
295: { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
296: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
1.3 kristaps 297: { domath, "math", 4 }, /* TEXICMD_MATH */
1.1 kristaps 298: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
1.3 kristaps 299: { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */
1.1 kristaps 300: { doignline, "node", 4 }, /* TEXICMD_NODE */
1.3 kristaps 301: { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */
1.8 ! kristaps 302: { dolink, "pxref", 5 }, /* TEXICMD_PXREF */
1.3 kristaps 303: { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */
1.1 kristaps 304: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.3 kristaps 305: { doignline, "page", 4 }, /* TEXICMD_PAGE */
306: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
1.2 kristaps 307: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1 kristaps 308: { dobracket, "ref", 3 }, /* TEXICMD_REF */
309: { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
1.7 kristaps 310: { dobracket, "sc", 2 }, /* TEXICMD_SC */
1.1 kristaps 311: { dosection, "section", 7 }, /* TEXICMD_SECTION */
1.3 kristaps 312: { doignline, "set", 3 }, /* TEXICMD_SET */
1.1 kristaps 313: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
314: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
1.5 kristaps 315: { doignline, "settitle", 8 }, /* TEXICMD_SETTITLE */
1.3 kristaps 316: { dosp, "sp", 2 }, /* TEXICMD_SP */
317: { dosymbol, " ", 1 }, /* TEXICMD_SPACE */
318: { doexample, "smallexample", 12 }, /* TEXICMD_SMALLEXAMPLE */
319: { dosymbol, "{", 1 }, /* TEXICMD_SQUIGGLE_LEFT */
320: { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */
1.8 ! kristaps 321: { doemph, "strong", 6 }, /* TEXICMD_STRONG */
1.1 kristaps 322: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
1.3 kristaps 323: { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */
324: { dosymbol, "\t", 1 }, /* TEXICMD_TAB */
1.1 kristaps 325: { dotable, "table", 5 }, /* TEXICMD_TABLE */
326: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
327: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
1.3 kristaps 328: { doignline, "title", 5 }, /* TEXICMD_TITLE */
1.1 kristaps 329: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
330: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
331: { dotop, "top", 3 }, /* TEXICMD_TOP */
1.3 kristaps 332: { dochapter, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 kristaps 333: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.8 ! kristaps 334: { dolink, "uref", 4 }, /* TEXICMD_UREF */
! 335: { dolink, "url", 3 }, /* TEXICMD_URL */
1.1 kristaps 336: { doliteral, "var", 3 }, /* TEXICMD_VAR */
1.3 kristaps 337: { dobracket, "w", 1 }, /* TEXICMD_W */
1.8 ! kristaps 338: { dolink, "xref", 4 }, /* TEXICMD_XREF */
1.1 kristaps 339: };
340:
1.2 kristaps 341: /*
342: * Unmap the top-most file that we're using.
343: */
1.1 kristaps 344: static void
345: texifilepop(struct texi *p)
346: {
347: struct texifile *f;
348:
349: assert(p->filepos > 0);
350: f = &p->files[--p->filepos];
351: munmap(f->map, f->mapsz);
352: }
353:
1.2 kristaps 354: /*
1.8 ! kristaps 355: * Unmap all files that we're currently using and free all resources
! 356: * that we've allocated during the parse.
1.2 kristaps 357: * The utility should exit(...) after this is called.
358: */
1.1 kristaps 359: static void
360: texiexit(struct texi *p)
361: {
1.5 kristaps 362: size_t i;
363:
364: if (p->outcol)
365: putchar('\n');
1.1 kristaps 366:
367: while (p->filepos > 0)
368: texifilepop(p);
1.5 kristaps 369:
370: for (i = 0; i < p->dirsz; i++)
371: free(p->dirs[i]);
372: free(p->dirs);
1.1 kristaps 373: }
374:
1.2 kristaps 375: /*
376: * Fatal error: unmap all files and exit.
377: * The "errstring" is passed to perror(3).
378: */
1.1 kristaps 379: static void
1.2 kristaps 380: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 381: {
382:
383: perror(errstring);
384: texiexit(p);
385: exit(EXIT_FAILURE);
386: }
387:
388: /*
389: * Print a generic warning message (to stderr) tied to our current
390: * location in the parse sequence.
391: */
392: static void
393: texiwarn(const struct texi *p, const char *fmt, ...)
394: {
395: va_list ap;
396:
1.2 kristaps 397: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 398: p->files[p->filepos - 1].name,
399: p->files[p->filepos - 1].line + 1,
400: p->files[p->filepos - 1].col + 1);
401: va_start(ap, fmt);
402: vfprintf(stderr, fmt, ap);
403: va_end(ap);
404: fputc('\n', stderr);
405: }
406:
1.8 ! kristaps 407: /*
! 408: * Print an error message (to stderr) tied to our current location in
! 409: * the parse sequence, invoke texiexit(), then die.
! 410: */
1.2 kristaps 411: static void
412: texierr(struct texi *p, const char *fmt, ...)
413: {
414: va_list ap;
415:
416: fprintf(stderr, "%s:%zu:%zu: error: ",
417: p->files[p->filepos - 1].name,
418: p->files[p->filepos - 1].line + 1,
419: p->files[p->filepos - 1].col + 1);
420: va_start(ap, fmt);
421: vfprintf(stderr, fmt, ap);
422: va_end(ap);
423: fputc('\n', stderr);
424: texiexit(p);
425: exit(EXIT_FAILURE);
426: }
427:
1.1 kristaps 428: /*
1.8 ! kristaps 429: * Put a single data character to the output if we're not ignoring.
! 430: * Adjusts our output status.
1.1 kristaps 431: */
432: static void
433: texiputchar(struct texi *p, char c)
434: {
435:
1.3 kristaps 436: if (p->ign)
1.1 kristaps 437: return;
438: putchar(c);
439: if ('\n' == c) {
440: p->outcol = 0;
441: p->seenws = 0;
442: } else
443: p->outcol++;
444: }
445:
446: /*
447: * Put multiple characters (see texiputchar()).
448: */
449: static void
450: texiputchars(struct texi *p, const char *s)
451: {
452:
453: while ('\0' != *s)
454: texiputchar(p, *s++);
455: }
456:
457: /*
1.8 ! kristaps 458: * Close an mdoc(7) macro opened with teximacroopen().
! 459: * If there are no more macros on the line, prints a newline.
1.1 kristaps 460: */
461: static void
1.3 kristaps 462: teximacroclose(struct texi *p)
463: {
464:
1.8 ! kristaps 465: /* FIXME: punctuation. */
1.5 kristaps 466: if (0 == --p->outmacro)
467: texiputchar(p, '\n');
1.3 kristaps 468: }
469:
470: /*
1.8 ! kristaps 471: * Open a mdoc(7) macro.
! 472: * This is used for line macros, e.g., Qq [foo bar baz].
! 473: * It can be invoked for nested macros, e.g., Qq Li foo .
1.3 kristaps 474: */
475: static void
476: teximacroopen(struct texi *p, const char *s)
1.1 kristaps 477: {
478:
1.5 kristaps 479: if (p->outcol && 0 == p->outmacro)
480: texiputchar(p, '\n');
481: if (0 == p->outmacro)
482: texiputchar(p, '.');
483: else
484: texiputchar(p, ' ');
485: texiputchars(p, s);
486: texiputchar(p, ' ');
1.3 kristaps 487: p->outmacro++;
1.5 kristaps 488: p->seenws = 0;
1.1 kristaps 489: }
490:
491: /*
1.8 ! kristaps 492: * Put a stadnalone mdoc(7) command with the trailing newline.
1.1 kristaps 493: */
494: static void
495: teximacro(struct texi *p, const char *s)
496: {
497:
1.4 kristaps 498: if (p->outmacro)
499: texierr(p, "\"%s\" in open line scope!?", s);
500: else if (p->literal)
501: texierr(p, "\"%s\" in a literal scope!?", s);
502:
1.1 kristaps 503: if (p->outcol)
504: texiputchar(p, '\n');
1.5 kristaps 505:
506: texiputchar(p, '.');
507: texiputchars(p, s);
508: texiputchar(p, '\n');
1.1 kristaps 509: }
510:
511: /*
512: * Advance by a single byte in the input stream.
513: */
514: static void
515: advance(struct texi *p, const char *buf, size_t *pos)
516: {
517:
518: if ('\n' == buf[*pos]) {
519: p->files[p->filepos - 1].line++;
520: p->files[p->filepos - 1].col = 0;
521: } else
522: p->files[p->filepos - 1].col++;
523:
524: (*pos)++;
525: }
526:
527: /*
528: * Advance to the next non-whitespace word in the input stream.
529: * If we're in literal mode, then print all of the whitespace as we're
530: * doing so.
531: */
532: static size_t
533: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
534: {
535:
1.3 kristaps 536: if (p->literal) {
1.1 kristaps 537: while (*pos < sz && isspace(buf[*pos])) {
1.5 kristaps 538: if (*pos && '\n' == buf[*pos] &&
539: '\\' == buf[*pos - 1])
540: texiputchar(p, 'e');
1.1 kristaps 541: texiputchar(p, buf[*pos]);
542: advance(p, buf, pos);
543: }
544: return(*pos);
545: }
546:
547: while (*pos < sz && isspace(buf[*pos])) {
548: p->seenws = 1;
549: /*
550: * If it looks like we've printed a double-line, then
551: * output a paragraph.
552: * FIXME: this is stupid.
553: */
1.5 kristaps 554: if (*pos && '\n' == buf[*pos] &&
555: '\n' == buf[*pos - 1])
556: teximacro(p, "Pp");
1.1 kristaps 557: advance(p, buf, pos);
558: }
559: return(*pos);
560: }
561:
562: /*
563: * Advance to the EOLN in the input stream.
564: */
565: static size_t
1.3 kristaps 566: advanceeoln(struct texi *p, const char *buf,
567: size_t sz, size_t *pos, int consumenl)
1.1 kristaps 568: {
569:
1.8 ! kristaps 570: /* FIXME: disregards @NEWLINE. */
1.1 kristaps 571: while (*pos < sz && '\n' != buf[*pos])
572: advance(p, buf, pos);
1.3 kristaps 573: if (*pos < sz && consumenl)
574: advance(p, buf, pos);
1.1 kristaps 575: return(*pos);
576: }
577:
578: /*
579: * Advance to position "end", which is an absolute position in the
580: * current buffer greater than or equal to the current position.
581: */
582: static void
583: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
584: {
585:
586: assert(*pos <= end);
587: while (*pos < end)
588: advance(p, buf, pos);
589: }
590:
591: /*
592: * Output a free-form word in the input stream, progressing to the next
593: * command or white-space.
594: * This also will advance the input stream.
595: */
596: static void
1.8 ! kristaps 597: texiword(struct texi *p, const char *buf,
! 598: size_t sz, size_t *pos, char extra)
1.1 kristaps 599: {
600:
1.3 kristaps 601: if (0 == p->outmacro && p->outcol > 72 && 0 == p->literal)
1.1 kristaps 602: texiputchar(p, '\n');
1.8 ! kristaps 603: /* FIXME: abstract this: we use it elsewhere. */
1.3 kristaps 604: if (p->seenws && p->outcol && 0 == p->literal)
1.1 kristaps 605: texiputchar(p, ' ');
606:
607: p->seenws = 0;
608:
609: while (*pos < sz && ! isspace(buf[*pos])) {
610: switch (buf[*pos]) {
611: case ('@'):
612: case ('}'):
613: case ('{'):
614: return;
615: }
1.8 ! kristaps 616: if ('\0' != extra && buf[*pos] == extra)
! 617: return;
1.1 kristaps 618: if (*pos < sz - 1 &&
619: '`' == buf[*pos] &&
620: '`' == buf[*pos + 1]) {
621: texiputchars(p, "\\(lq");
622: advance(p, buf, pos);
623: } else if (*pos < sz - 1 &&
624: '\'' == buf[*pos] &&
625: '\'' == buf[*pos + 1]) {
626: texiputchars(p, "\\(rq");
627: advance(p, buf, pos);
628: } else
629: texiputchar(p, buf[*pos]);
630: advance(p, buf, pos);
631: }
632: }
633:
1.8 ! kristaps 634: /*
! 635: * Look up the command at position "pos" in the buffer, returning it (or
! 636: * TEXICMD__MAX if none found) and setting "end" to be the absolute
! 637: * index after the command name.
! 638: */
1.1 kristaps 639: static enum texicmd
640: texicmd(struct texi *p, const char *buf,
641: size_t pos, size_t sz, size_t *end)
642: {
643: size_t i, len;
644:
645: assert('@' == buf[pos]);
1.3 kristaps 646:
647: if (++pos >= sz)
648: return(TEXICMD__MAX);
649:
650: /* Alphabetic commands are special. */
651: if ( ! isalpha(buf[pos])) {
652: *end = pos + 1;
653: for (i = 0; i < TEXICMD__MAX; i++) {
654: if (1 != texitoks[i].len)
655: continue;
656: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
657: return(i);
658: }
659: texiwarn(p, "bad command: @%c", buf[pos]);
660: return(TEXICMD__MAX);
661: }
662:
663: for (*end = pos; *end < sz && ! isspace(buf[*end]); (*end)++)
664: if ((*end > pos && ('@' == buf[*end] ||
665: '{' == buf[*end] || '}' == buf[*end])))
1.1 kristaps 666: break;
667:
668: len = *end - pos;
669: for (i = 0; i < TEXICMD__MAX; i++) {
670: if (len != texitoks[i].len)
671: continue;
672: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
673: return(i);
674: }
675:
1.3 kristaps 676: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
1.1 kristaps 677: return(TEXICMD__MAX);
678: }
679:
1.8 ! kristaps 680: /*
! 681: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
! 682: * Num should be set to the argument we're currently parsing, although
! 683: * it suffixes for it to be zero or non-zero.
! 684: * This will return 1 if there are more arguments, 0 otherwise.
! 685: * This will stop (returning 0) in the event of EOF or if we're not at a
! 686: * bracket for the zeroth parse.
! 687: */
! 688: static int
! 689: parsearg(struct texi *p, const char *buf,
! 690: size_t sz, size_t *pos, size_t num)
! 691: {
! 692: size_t end;
! 693: enum texicmd cmd;
! 694:
! 695: while (*pos < sz && isspace(buf[*pos]))
! 696: advance(p, buf, pos);
! 697: if (*pos == sz || (0 == num && '{' != buf[*pos]))
! 698: return(0);
! 699: if (0 == num)
! 700: advance(p, buf, pos);
! 701:
! 702: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
! 703: switch (buf[*pos]) {
! 704: case (','):
! 705: advance(p, buf, pos);
! 706: return(1);
! 707: case ('}'):
! 708: advance(p, buf, pos);
! 709: return(0);
! 710: case ('{'):
! 711: if (0 == p->ign)
! 712: texiwarn(p, "unexpected \"{\"");
! 713: advance(p, buf, pos);
! 714: continue;
! 715: case ('@'):
! 716: break;
! 717: default:
! 718: texiword(p, buf, sz, pos, ',');
! 719: continue;
! 720: }
! 721:
! 722: cmd = texicmd(p, buf, *pos, sz, &end);
! 723: advanceto(p, buf, pos, end);
! 724: if (TEXICMD__MAX == cmd)
! 725: continue;
! 726: if (NULL != texitoks[cmd].fp)
! 727: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
! 728: }
! 729: return(0);
! 730: }
! 731:
! 732: /*
! 733: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
! 734: * This will stop in the event of EOF or if we're not at a bracket.
! 735: */
1.1 kristaps 736: static void
737: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
738: {
739: size_t end;
740: enum texicmd cmd;
741:
1.3 kristaps 742: while (*pos < sz && isspace(buf[*pos]))
743: advance(p, buf, pos);
744:
1.1 kristaps 745: if (*pos == sz || '{' != buf[*pos])
746: return;
747: advance(p, buf, pos);
748:
749: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
750: switch (buf[*pos]) {
751: case ('}'):
752: advance(p, buf, pos);
753: return;
754: case ('{'):
1.3 kristaps 755: if (0 == p->ign)
756: texiwarn(p, "unexpected \"{\"");
757: advance(p, buf, pos);
758: continue;
759: case ('@'):
760: break;
761: default:
1.8 ! kristaps 762: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 763: continue;
764: }
765:
766: cmd = texicmd(p, buf, *pos, sz, &end);
767: advanceto(p, buf, pos, end);
768: if (TEXICMD__MAX == cmd)
769: continue;
770: if (NULL != texitoks[cmd].fp)
771: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
772: }
773: }
774:
775: /*
776: * This should be invoked when we're on a macro line and want to process
777: * to the end of the current input line, doing all of our macros along
778: * the way.
779: */
780: static void
781: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
782: {
783: size_t end;
784: enum texicmd cmd;
785:
786: assert(0 == p->literal);
787:
788: while (*pos < sz && '\n' != buf[*pos]) {
789: while (*pos < sz && isws(buf[*pos])) {
790: p->seenws = 1;
791: advance(p, buf, pos);
792: }
793: switch (buf[*pos]) {
794: case ('}'):
795: if (0 == p->ign)
796: texiwarn(p, "unexpected \"}\"");
797: advance(p, buf, pos);
798: continue;
799: case ('{'):
800: if (0 == p->ign)
801: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 802: advance(p, buf, pos);
803: continue;
804: case ('@'):
805: break;
806: default:
1.8 ! kristaps 807: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 808: continue;
809: }
810:
811: cmd = texicmd(p, buf, *pos, sz, &end);
812: advanceto(p, buf, pos, end);
813: if (TEXICMD__MAX == cmd)
814: continue;
815: if (NULL != texitoks[cmd].fp)
816: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
817: }
818: }
819:
1.8 ! kristaps 820: /*
! 821: * Parse a single word or command.
! 822: * This will return immediately at the EOF.
! 823: */
1.1 kristaps 824: static void
1.3 kristaps 825: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
826: {
827: size_t end;
828: enum texicmd cmd;
829:
830: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
831: return;
832:
833: switch (buf[*pos]) {
834: case ('}'):
835: if (0 == p->ign)
836: texiwarn(p, "unexpected \"}\"");
837: advance(p, buf, pos);
838: return;
839: case ('{'):
840: if (0 == p->ign)
841: texiwarn(p, "unexpected \"{\"");
842: advance(p, buf, pos);
843: return;
844: case ('@'):
845: break;
846: default:
1.8 ! kristaps 847: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 848: return;
849: }
850:
851: cmd = texicmd(p, buf, *pos, sz, &end);
852: advanceto(p, buf, pos, end);
853: if (TEXICMD__MAX == cmd)
854: return;
855: if (NULL != texitoks[cmd].fp)
856: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
857: }
858:
1.8 ! kristaps 859: /*
! 860: * Parse til the end of the buffer.
! 861: */
1.3 kristaps 862: static void
1.7 kristaps 863: parseeof(struct texi *p, const char *buf, size_t sz)
864: {
865: size_t pos;
866:
867: for (pos = 0; pos < sz; )
868: parsesingle(p, buf, sz, &pos);
869: }
870:
1.8 ! kristaps 871: /*
! 872: * Parse a block sequence until we have the "@end endtoken" command
! 873: * invocation.
! 874: * This will return immediately at EOF.
! 875: */
1.7 kristaps 876: static void
1.1 kristaps 877: parseto(struct texi *p, const char *buf,
878: size_t sz, size_t *pos, const char *endtoken)
879: {
880: size_t end;
881: enum texicmd cmd;
882: size_t endtoksz;
883:
884: endtoksz = strlen(endtoken);
885: assert(endtoksz > 0);
886:
887: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
888: switch (buf[*pos]) {
889: case ('}'):
1.3 kristaps 890: if (0 == p->ign)
891: texiwarn(p, "unexpected \"}\"");
1.1 kristaps 892: advance(p, buf, pos);
893: continue;
894: case ('{'):
1.3 kristaps 895: if (0 == p->ign)
896: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 897: advance(p, buf, pos);
898: continue;
899: case ('@'):
900: break;
901: default:
1.8 ! kristaps 902: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 903: continue;
904: }
905:
906: cmd = texicmd(p, buf, *pos, sz, &end);
907: advanceto(p, buf, pos, end);
908: if (TEXICMD_END == cmd) {
1.2 kristaps 909: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 910: advance(p, buf, pos);
911: /*
1.8 ! kristaps 912: * FIXME: check the full word, not just its
! 913: * initial substring!
1.1 kristaps 914: */
915: if (sz - *pos >= endtoksz && 0 == strncmp
916: (&buf[*pos], endtoken, endtoksz)) {
1.3 kristaps 917: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 918: break;
919: }
1.3 kristaps 920: if (0 == p->ign)
921: texiwarn(p, "unexpected \"end\"");
922: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 923: continue;
924: } else if (TEXICMD__MAX != cmd)
925: if (NULL != texitoks[cmd].fp)
926: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
927: }
928: }
929:
1.8 ! kristaps 930: /*
! 931: * Memory-map the file "fname" and begin parsing it.
! 932: * This can be called in a nested context.
! 933: */
1.1 kristaps 934: static void
1.2 kristaps 935: parsefile(struct texi *p, const char *fname)
936: {
937: struct texifile *f;
938: int fd;
939: struct stat st;
940:
941: assert(p->filepos < 64);
942: f = &p->files[p->filepos];
943: memset(f, 0, sizeof(struct texifile));
944:
945: f->name = fname;
946: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
947: texiabort(p, fname);
948: } else if (-1 == fstat(fd, &st)) {
949: close(fd);
950: texiabort(p, fname);
951: }
952:
953: f->mapsz = st.st_size;
954: f->map = mmap(NULL, f->mapsz,
955: PROT_READ, MAP_SHARED, fd, 0);
956: close(fd);
957:
958: if (MAP_FAILED == f->map)
959: texiabort(p, fname);
960:
961: p->filepos++;
962: parseeof(p, f->map, f->mapsz);
963: texifilepop(p);
964: }
965:
966: static void
1.3 kristaps 967: dodeftypevar(struct texi *p, enum texicmd cmd,
968: const char *buf, size_t sz, size_t *pos)
969: {
970: const char *blk;
971:
972: blk = TEXICMD_DEFTYPEVR == cmd ?
973: "deftypevr" : "deftypevar";
974:
975: if (p->ign) {
976: parseto(p, buf, sz, pos, blk);
977: return;
978: }
979:
1.5 kristaps 980: teximacro(p, "Pp");
1.3 kristaps 981: if (TEXICMD_DEFTYPEVR == cmd) {
982: parsebracket(p, buf, sz, pos);
983: texiputchars(p, ":\n");
984: }
1.5 kristaps 985: teximacroopen(p, "Vt");
1.4 kristaps 986: parseeoln(p, buf, sz, pos);
1.3 kristaps 987: teximacroclose(p);
1.5 kristaps 988: teximacro(p, "Pp");
1.3 kristaps 989: parseto(p, buf, sz, pos, blk);
990: }
991:
992: static void
993: dodeftypefun(struct texi *p, enum texicmd cmd,
994: const char *buf, size_t sz, size_t *pos)
995: {
996: const char *blk;
997:
1.5 kristaps 998: blk = NULL;
1.3 kristaps 999: switch (cmd) {
1000: case (TEXICMD_DEFTYPEFN):
1001: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1002: blk = texitoks[cmd].tok;
1.3 kristaps 1003: break;
1.5 kristaps 1004: default:
1.3 kristaps 1005: break;
1006: }
1007:
1008: if (p->ign) {
1009: if (NULL != blk)
1010: parseto(p, buf, sz, pos, blk);
1011: return;
1012: }
1013:
1014: switch (cmd) {
1015: case (TEXICMD_DEFTYPEFN):
1016: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1017: teximacro(p, "Pp");
1.3 kristaps 1018: break;
1019: default:
1020: break;
1021: }
1022: if (TEXICMD_DEFTYPEFN == cmd ||
1023: TEXICMD_DEFTYPEFNX == cmd) {
1024: parsebracket(p, buf, sz, pos);
1025: texiputchars(p, ":\n");
1026: }
1.5 kristaps 1027: teximacroopen(p, "Ft");
1.3 kristaps 1028: parsesingle(p, buf, sz, pos);
1029: teximacroclose(p);
1.5 kristaps 1030: teximacroopen(p, "Fn");
1.3 kristaps 1031: parsesingle(p, buf, sz, pos);
1032: teximacroclose(p);
1.5 kristaps 1033: teximacroopen(p, "Li");
1.4 kristaps 1034: parseeoln(p, buf, sz, pos);
1.3 kristaps 1035: teximacroclose(p);
1.5 kristaps 1036: teximacro(p, "Pp");
1.3 kristaps 1037: if (NULL != blk)
1038: parseto(p, buf, sz, pos, blk);
1039: }
1040:
1041: static void
1.1 kristaps 1042: doignblock(struct texi *p, enum texicmd cmd,
1043: const char *buf, size_t sz, size_t *pos)
1044: {
1045:
1.3 kristaps 1046: p->ign++;
1.5 kristaps 1047: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1048: p->ign--;
1.1 kristaps 1049: }
1050:
1051: static void
1.3 kristaps 1052: doblock(struct texi *p, enum texicmd cmd,
1.1 kristaps 1053: const char *buf, size_t sz, size_t *pos)
1054: {
1055:
1.5 kristaps 1056: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.1 kristaps 1057: }
1058:
1059: static void
1060: doinline(struct texi *p, const char *buf,
1061: size_t sz, size_t *pos, const char *macro)
1062: {
1063:
1.5 kristaps 1064: teximacroopen(p, macro);
1.1 kristaps 1065: p->seenws = 0;
1066: parsebracket(p, buf, sz, pos);
1067: if (*pos < sz - 1 &&
1068: ismpunct(buf[*pos]) &&
1069: isspace(buf[*pos + 1])) {
1070: texiputchar(p, ' ');
1071: texiputchar(p, buf[*pos]);
1072: advance(p, buf, pos);
1073: }
1.5 kristaps 1074: teximacroclose(p);
1.1 kristaps 1075: }
1076:
1077: static void
1.2 kristaps 1078: doinclude(struct texi *p, enum texicmd cmd,
1079: const char *buf, size_t sz, size_t *pos)
1080: {
1081: char fname[PATH_MAX], path[PATH_MAX];
1082: size_t i;
1083: int rc;
1084:
1085: while (*pos < sz && ' ' == buf[*pos])
1086: advance(p, buf, pos);
1087:
1088: /* Read in the filename. */
1089: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1090: if (i == sizeof(fname) - 1)
1091: break;
1092: fname[i] = buf[*pos];
1093: advance(p, buf, pos);
1094: }
1095:
1096: if (i == 0)
1097: texierr(p, "path too short");
1098: else if ('\n' != buf[*pos])
1099: texierr(p, "path too long");
1100: else if ('/' == fname[0])
1101: texierr(p, "no absolute paths");
1102: fname[i] = '\0';
1103:
1104: if (strstr(fname, "../") || strstr(fname, "/.."))
1105: texierr(p, "insecure path");
1106:
1.5 kristaps 1107: for (i = 0; i < p->dirsz; i++) {
1108: rc = snprintf(path, sizeof(path),
1109: "%s/%s", p->dirs[i], fname);
1110: if (rc < 0)
1111: texierr(p, "couldn't format path");
1112: else if ((size_t)rc >= sizeof(path))
1113: texierr(p, "path too long");
1114: else if (-1 == access(path, R_OK))
1115: continue;
1116:
1117: parsefile(p, path);
1118: return;
1119: }
1.2 kristaps 1120:
1.5 kristaps 1121: texierr(p, "couldn't find %s in includes", fname);
1.2 kristaps 1122: }
1123:
1124: static void
1.1 kristaps 1125: doitalic(struct texi *p, enum texicmd cmd,
1126: const char *buf, size_t sz, size_t *pos)
1127: {
1128:
1129: texiputchars(p, "\\fI");
1130: parsebracket(p, buf, sz, pos);
1131: texiputchars(p, "\\fP");
1132: }
1133:
1134: static void
1.3 kristaps 1135: doenv(struct texi *p, enum texicmd cmd,
1136: const char *buf, size_t sz, size_t *pos)
1137: {
1138:
1139: if (p->literal)
1140: parsebracket(p, buf, sz, pos);
1141: else
1142: doinline(p, buf, sz, pos, "Ev");
1143: }
1144:
1145: static void
1.1 kristaps 1146: doliteral(struct texi *p, enum texicmd cmd,
1147: const char *buf, size_t sz, size_t *pos)
1148: {
1149:
1.3 kristaps 1150: if (p->literal)
1.1 kristaps 1151: parsebracket(p, buf, sz, pos);
1152: else
1153: doinline(p, buf, sz, pos, "Li");
1154: }
1155:
1156: static void
1157: doemph(struct texi *p, enum texicmd cmd,
1158: const char *buf, size_t sz, size_t *pos)
1159: {
1160:
1.3 kristaps 1161: if (p->literal)
1.1 kristaps 1162: doitalic(p, cmd, buf, sz, pos);
1163: else
1164: doinline(p, buf, sz, pos, "Em");
1165: }
1166:
1167: static void
1168: docommand(struct texi *p, enum texicmd cmd,
1169: const char *buf, size_t sz, size_t *pos)
1170: {
1171:
1172: doinline(p, buf, sz, pos, "Xr");
1173: }
1174:
1175: static void
1176: dobracket(struct texi *p, enum texicmd cmd,
1177: const char *buf, size_t sz, size_t *pos)
1178: {
1179:
1180: parsebracket(p, buf, sz, pos);
1181: }
1182:
1183: static void
1184: dofile(struct texi *p, enum texicmd cmd,
1185: const char *buf, size_t sz, size_t *pos)
1186: {
1187:
1.3 kristaps 1188: if (p->literal)
1.1 kristaps 1189: parsebracket(p, buf, sz, pos);
1190: else
1191: doinline(p, buf, sz, pos, "Pa");
1192: }
1193:
1194: static void
1.3 kristaps 1195: dodisplay(struct texi *p, enum texicmd cmd,
1196: const char *buf, size_t sz, size_t *pos)
1197: {
1198:
1.5 kristaps 1199: teximacro(p, "Bd -display -offset indent");
1.3 kristaps 1200: advanceeoln(p, buf, sz, pos, 1);
1201: parseto(p, buf, sz, pos, "display");
1.5 kristaps 1202: teximacro(p, "Ed");
1.3 kristaps 1203: }
1204:
1205: static void
1.1 kristaps 1206: doexample(struct texi *p, enum texicmd cmd,
1207: const char *buf, size_t sz, size_t *pos)
1208: {
1.3 kristaps 1209: const char *blk;
1210:
1211: blk = TEXICMD_EXAMPLE == cmd ? "example" : "smallexample";
1.1 kristaps 1212:
1.5 kristaps 1213: teximacro(p, "Bd -literal -offset indent");
1.3 kristaps 1214: advanceeoln(p, buf, sz, pos, 1);
1215: p->literal++;
1216: parseto(p, buf, sz, pos, blk);
1217: p->literal--;
1.5 kristaps 1218: teximacro(p, "Ed");
1.1 kristaps 1219: }
1220:
1221: static void
1222: dobye(struct texi *p, enum texicmd cmd,
1223: const char *buf, size_t sz, size_t *pos)
1224: {
1225:
1226: texiexit(p);
1227: exit(EXIT_SUCCESS);
1228: }
1229:
1230: static void
1231: dosymbol(struct texi *p, enum texicmd cmd,
1232: const char *buf, size_t sz, size_t *pos)
1233: {
1234:
1.3 kristaps 1235: if (p->seenws && p->outcol && 0 == p->literal) {
1236: texiputchar(p, ' ');
1237: p->seenws = 0;
1238: }
1239:
1.1 kristaps 1240: switch (cmd) {
1.3 kristaps 1241: case (TEXICMD_ASTERISK):
1242: case (TEXICMD_NEWLINE):
1243: case (TEXICMD_SPACE):
1244: case (TEXICMD_TAB):
1245: texiputchar(p, ' ');
1246: break;
1.1 kristaps 1247: case (TEXICMD_AT):
1.3 kristaps 1248: texiputchar(p, '@');
1249: break;
1250: case (TEXICMD_BANG):
1251: texiputchar(p, '!');
1.7 kristaps 1252: break;
1253: case (TEXICMD_BULLET):
1254: texiputchars(p, "\\(bu");
1.1 kristaps 1255: break;
1256: case (TEXICMD_COPYRIGHT):
1257: texiputchars(p, "\\(co");
1258: break;
1.2 kristaps 1259: case (TEXICMD_DOTS):
1260: texiputchars(p, "...");
1261: break;
1.1 kristaps 1262: case (TEXICMD_LATEX):
1263: texiputchars(p, "LaTeX");
1264: break;
1.3 kristaps 1265: case (TEXICMD_QUESTIONMARK):
1266: texiputchar(p, '?');
1267: break;
1268: case (TEXICMD_SQUIGGLE_LEFT):
1269: texiputchars(p, "{");
1270: break;
1271: case (TEXICMD_SQUIGGLE_RIGHT):
1272: texiputchars(p, "}");
1273: break;
1.1 kristaps 1274: case (TEXICMD_TEXSYM):
1275: texiputchars(p, "TeX");
1276: break;
1.3 kristaps 1277: case (TEXICMD_COLON):
1278: case (TEXICMD_HYPHEN):
1279: break;
1.1 kristaps 1280: default:
1.5 kristaps 1281: texiwarn(p, "sym: %d", cmd);
1.1 kristaps 1282: abort();
1283: }
1284:
1.5 kristaps 1285: if (texitoks[cmd].len > 1)
1286: doignbracket(p, cmd, buf, sz, pos);
1.1 kristaps 1287: }
1288:
1289: static void
1290: doquotation(struct texi *p, enum texicmd cmd,
1291: const char *buf, size_t sz, size_t *pos)
1292: {
1293:
1.5 kristaps 1294: teximacro(p, "Qo");
1.1 kristaps 1295: parseto(p, buf, sz, pos, "quotation");
1.5 kristaps 1296: teximacro(p, "Qc");
1.1 kristaps 1297: }
1298:
1.3 kristaps 1299: static void
1300: domath(struct texi *p, enum texicmd cmd,
1301: const char *buf, size_t sz, size_t *pos)
1302: {
1303: size_t nest;
1304:
1305: /*
1306: * Math handling is different from everything else.
1307: * We don't allow any subcomponents, and we ignore the rules in
1308: * terms of @-commands.
1309: * This departs from GNU's rules, but whatever.
1310: */
1311: while (*pos < sz && isws(buf[*pos]))
1312: advance(p, buf, pos);
1313: if (*pos == sz || '{' != buf[*pos])
1314: return;
1315: advance(p, buf, pos);
1316: if (p->seenws && p->outcol && 0 == p->literal)
1317: texiputchar(p, ' ');
1318: p->seenws = 0;
1319: for (nest = 1; *pos < sz && nest > 0; ) {
1320: if ('{' == buf[*pos])
1321: nest++;
1322: else if ('}' == buf[*pos])
1323: if (0 == --nest)
1324: continue;
1325: texiputchar(p, buf[*pos]);
1326: advance(p, buf, pos);
1327: }
1328: if (*pos == sz)
1329: return;
1330: assert('}' == buf[*pos]);
1331: advance(p, buf, pos);
1332: }
1333:
1.1 kristaps 1334: static void
1.8 ! kristaps 1335: dolink(struct texi *p, enum texicmd cmd,
1.1 kristaps 1336: const char *buf, size_t sz, size_t *pos)
1337: {
1.8 ! kristaps 1338: int c;
1.1 kristaps 1339:
1340: switch (cmd) {
1341: case (TEXICMD_EMAIL):
1.5 kristaps 1342: teximacroopen(p, "Mt");
1.1 kristaps 1343: break;
1.3 kristaps 1344: case (TEXICMD_UREF):
1.1 kristaps 1345: case (TEXICMD_URL):
1.5 kristaps 1346: teximacroopen(p, "Lk");
1.1 kristaps 1347: break;
1.8 ! kristaps 1348: case (TEXICMD_XREF):
! 1349: texiputchars(p, "See Section");
! 1350: teximacroopen(p, "Qq");
! 1351: break;
! 1352: case (TEXICMD_PXREF):
! 1353: texiputchars(p, "see Section");
! 1354: teximacroopen(p, "Qq");
! 1355: break;
1.1 kristaps 1356: default:
1.8 ! kristaps 1357: abort();
1.1 kristaps 1358: }
1.8 ! kristaps 1359:
! 1360: c = parsearg(p, buf, sz, pos, 0);
! 1361: p->ign++;
! 1362: while (c > 0)
! 1363: c = parsearg(p, buf, sz, pos, 1);
! 1364: p->ign--;
! 1365:
1.1 kristaps 1366: if (*pos < sz - 1 &&
1367: ismpunct(buf[*pos]) &&
1368: isspace(buf[*pos + 1])) {
1369: texiputchar(p, ' ');
1370: texiputchar(p, buf[*pos]);
1371: advance(p, buf, pos);
1372: }
1.8 ! kristaps 1373:
! 1374: teximacroclose(p);
! 1375: }
! 1376:
! 1377: static void
! 1378: doignargn(struct texi *p, enum texicmd cmd,
! 1379: const char *buf, size_t sz, size_t *pos)
! 1380: {
! 1381: int c;
! 1382:
! 1383: c = parsearg(p, buf, sz, pos, 0);
! 1384: p->ign++;
! 1385: while (c > 0)
! 1386: c = parsearg(p, buf, sz, pos, 1);
! 1387: p->ign--;
1.1 kristaps 1388: }
1389:
1390: static void
1391: dosubsection(struct texi *p, enum texicmd cmd,
1392: const char *buf, size_t sz, size_t *pos)
1393: {
1394:
1.5 kristaps 1395: teximacro(p, "Pp");
1396: teximacroopen(p, "Em");
1.3 kristaps 1397: parseeoln(p, buf, sz, pos);
1.5 kristaps 1398: teximacroclose(p);
1399: teximacro(p, "Pp");
1.1 kristaps 1400: }
1401:
1402: static void
1403: dosection(struct texi *p, enum texicmd cmd,
1404: const char *buf, size_t sz, size_t *pos)
1405: {
1406:
1.3 kristaps 1407: if (p->outmacro)
1408: texierr(p, "subsection in open line scope!?");
1409: else if (p->literal)
1410: texierr(p, "subsection in a literal scope!?");
1411:
1.5 kristaps 1412: teximacroopen(p, "Ss");
1.3 kristaps 1413: parseeoln(p, buf, sz, pos);
1414: teximacroclose(p);
1415: }
1416:
1417: static void
1418: dosp(struct texi *p, enum texicmd cmd,
1419: const char *buf, size_t sz, size_t *pos)
1420: {
1421:
1.5 kristaps 1422: teximacro(p, "Pp");
1.3 kristaps 1423: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1424: }
1425:
1426: static void
1.3 kristaps 1427: dochapter(struct texi *p, enum texicmd cmd,
1.1 kristaps 1428: const char *buf, size_t sz, size_t *pos)
1429: {
1430:
1.3 kristaps 1431: if (p->outmacro)
1432: texierr(p, "section in open line scope!?");
1433: else if (p->literal)
1434: texierr(p, "section in a literal scope!?");
1435:
1.5 kristaps 1436: teximacroopen(p, "Sh");
1.3 kristaps 1437: parseeoln(p, buf, sz, pos);
1438: teximacroclose(p);
1.1 kristaps 1439: }
1440:
1441: static void
1442: dotop(struct texi *p, enum texicmd cmd,
1443: const char *buf, size_t sz, size_t *pos)
1444: {
1445:
1.3 kristaps 1446: p->ign--;
1447: advanceeoln(p, buf, sz, pos, 1);
1.6 kristaps 1448: teximacro(p, "Dd $Mdocdate: February 18 2015 $");
1.5 kristaps 1449: teximacro(p, "Dt SOMETHING 7");
1450: teximacro(p, "Os");
1451: teximacro(p, "Sh NAME");
1452: teximacro(p, "Nm Something");
1453: teximacro(p, "Nd Something");
1.1 kristaps 1454: }
1455:
1456: static void
1457: doitem(struct texi *p, enum texicmd cmd,
1458: const char *buf, size_t sz, size_t *pos)
1459: {
1460:
1.3 kristaps 1461: if (p->outmacro)
1462: texierr(p, "item in open line scope!?");
1463: else if (p->literal)
1464: texierr(p, "item in a literal scope!?");
1465:
1466: switch (p->list) {
1467: case (TEXILIST_ITEM):
1.5 kristaps 1468: teximacroopen(p, "It");
1.3 kristaps 1469: break;
1470: case (TEXILIST_NOITEM):
1.5 kristaps 1471: teximacro(p, "It");
1.3 kristaps 1472: break;
1473: default:
1.5 kristaps 1474: teximacro(p, "Pp");
1.3 kristaps 1475: break;
1476: }
1477:
1478: parseeoln(p, buf, sz, pos);
1.1 kristaps 1479:
1.3 kristaps 1480: if (TEXILIST_ITEM == p->list)
1481: teximacroclose(p);
1482: else
1.1 kristaps 1483: texiputchar(p, '\n');
1484: }
1485:
1486: static void
1487: dotable(struct texi *p, enum texicmd cmd,
1488: const char *buf, size_t sz, size_t *pos)
1489: {
1.3 kristaps 1490: enum texilist sv = p->list;
1491:
1492: p->list = TEXILIST_ITEM;
1.5 kristaps 1493: teximacro(p, "Bl -tag -width Ds");
1.1 kristaps 1494: parseto(p, buf, sz, pos, "table");
1.5 kristaps 1495: teximacro(p, "El");
1.3 kristaps 1496: p->list = sv;
1.1 kristaps 1497: }
1498:
1499: static void
1.2 kristaps 1500: doenumerate(struct texi *p, enum texicmd cmd,
1501: const char *buf, size_t sz, size_t *pos)
1502: {
1.3 kristaps 1503: enum texilist sv = p->list;
1.2 kristaps 1504:
1.3 kristaps 1505: p->list = TEXILIST_NOITEM;
1.5 kristaps 1506: teximacro(p, "Bl -enum");
1.2 kristaps 1507: parseto(p, buf, sz, pos, "enumerate");
1.5 kristaps 1508: teximacro(p, "El");
1.3 kristaps 1509: p->list = sv;
1.2 kristaps 1510: }
1511:
1512: static void
1.1 kristaps 1513: doitemize(struct texi *p, enum texicmd cmd,
1514: const char *buf, size_t sz, size_t *pos)
1515: {
1.3 kristaps 1516: enum texilist sv = p->list;
1.1 kristaps 1517:
1.3 kristaps 1518: p->list = TEXILIST_ITEM;
1.5 kristaps 1519: teximacro(p, "Bl -bullet");
1.1 kristaps 1520: parseto(p, buf, sz, pos, "itemize");
1.5 kristaps 1521: teximacro(p, "El");
1.3 kristaps 1522: p->list = sv;
1.1 kristaps 1523: }
1524:
1525: static void
1526: doignbracket(struct texi *p, enum texicmd cmd,
1527: const char *buf, size_t sz, size_t *pos)
1528: {
1529:
1.3 kristaps 1530: p->ign++;
1.1 kristaps 1531: parsebracket(p, buf, sz, pos);
1.3 kristaps 1532: p->ign--;
1.1 kristaps 1533: }
1534:
1535: static void
1536: doignline(struct texi *p, enum texicmd cmd,
1537: const char *buf, size_t sz, size_t *pos)
1538: {
1539:
1.3 kristaps 1540: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1541: }
1542:
1.8 ! kristaps 1543: /*
! 1544: * Parse colon-separated directories from "cp" (if not NULL) and returns
! 1545: * the array of pointers.
! 1546: * Prepends "base" to the array.
! 1547: * This does NOT sanitise the directories!
! 1548: */
1.5 kristaps 1549: static char **
1550: parsedirs(const char *base, const char *cp, size_t *sz)
1551: {
1552: char *tok, *str, *tofree;
1553: const char *cpp;
1554: size_t i;
1555: char **dirs;
1556:
1557: *sz = NULL != (cpp = cp) ? 2 : 1;
1558: if (*sz > 1)
1559: for ( ; NULL != (cpp = strchr(cpp, ':')); (*sz)++)
1560: cpp++;
1561:
1562: dirs = calloc(*sz, sizeof(char *));
1563: if (NULL == dirs) {
1564: perror(NULL);
1565: exit(EXIT_FAILURE);
1566: } else if (NULL == (dirs[0] = strdup(base))) {
1567: perror(NULL);
1568: exit(EXIT_FAILURE);
1569: }
1570:
1571: if (NULL == cp)
1572: return(dirs);
1573:
1574: if (NULL == (tofree = tok = str = strdup(cp))) {
1575: perror(NULL);
1576: exit(EXIT_FAILURE);
1577: }
1578:
1579: for (i = 1; NULL != (tok = strsep(&str, ":")); i++)
1580: if (NULL == (dirs[i] = strdup(tok))) {
1581: perror(NULL);
1582: exit(EXIT_FAILURE);
1583: }
1584:
1585: free(tofree);
1586: return(dirs);
1587: }
1588:
1.1 kristaps 1589: int
1590: main(int argc, char *argv[])
1591: {
1592: struct texi texi;
1.2 kristaps 1593: int c;
1594: char *path, *dir;
1.5 kristaps 1595: const char *progname, *Idir;
1.1 kristaps 1596:
1597: progname = strrchr(argv[0], '/');
1598: if (progname == NULL)
1599: progname = argv[0];
1600: else
1601: ++progname;
1602:
1.5 kristaps 1603: Idir = NULL;
1604: while (-1 != (c = getopt(argc, argv, "I:")))
1.1 kristaps 1605: switch (c) {
1.5 kristaps 1606: case ('I'):
1607: Idir = optarg;
1608: break;
1.1 kristaps 1609: default:
1610: goto usage;
1611: }
1612:
1613: argv += optind;
1614: if (0 == (argc -= optind))
1615: goto usage;
1616:
1.2 kristaps 1617: if (NULL == (path = strdup(argv[0]))) {
1618: perror(NULL);
1619: exit(EXIT_FAILURE);
1620: } else if (NULL == (dir = dirname(path))) {
1621: perror(argv[0]);
1622: free(path);
1623: exit(EXIT_FAILURE);
1624: }
1625: free(path);
1626:
1.1 kristaps 1627: memset(&texi, 0, sizeof(struct texi));
1.3 kristaps 1628: texi.ign = 1;
1.5 kristaps 1629: texi.dirs = parsedirs(dir, Idir, &texi.dirsz);
1.2 kristaps 1630: parsefile(&texi, argv[0]);
1.5 kristaps 1631: /* We shouldn't get here. */
1.2 kristaps 1632: texiexit(&texi);
1633: return(EXIT_FAILURE);
1.1 kristaps 1634: usage:
1.8 ! kristaps 1635: fprintf(stderr, "usage: %s [-Idirs] file\n", progname);
1.1 kristaps 1636: return(EXIT_FAILURE);
1637: }
CVSweb