Annotation of texi2mdoc/main.c, Revision 1.9
1.9 ! kristaps 1: /* $Id: main.c,v 1.8 2015/02/18 12:03:21 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 kristaps 24: #include <libgen.h>
25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
1.6 kristaps 30: #include <unistd.h>
1.1 kristaps 31:
32: /*
33: * This defines each one of the Texinfo commands that we understand.
34: * Obviously this only refers to native commands; overriden names are a
35: * different story.
36: */
37: enum texicmd {
1.2 kristaps 38: TEXICMD_ACRONYM,
1.1 kristaps 39: TEXICMD_A4PAPER,
40: TEXICMD_ANCHOR,
1.2 kristaps 41: TEXICMD_APPENDIX,
42: TEXICMD_APPENDIXSEC,
1.3 kristaps 43: TEXICMD_ASTERISK,
1.1 kristaps 44: TEXICMD_AT,
1.3 kristaps 45: TEXICMD_AUTHOR,
46: TEXICMD_BANG,
1.7 kristaps 47: TEXICMD_BULLET,
1.1 kristaps 48: TEXICMD_BYE,
1.5 kristaps 49: TEXICMD_CENTER,
1.1 kristaps 50: TEXICMD_CHAPTER,
51: TEXICMD_CINDEX,
1.3 kristaps 52: TEXICMD_CITE,
1.1 kristaps 53: TEXICMD_CODE,
1.3 kristaps 54: TEXICMD_COLON,
1.1 kristaps 55: TEXICMD_COMMAND,
56: TEXICMD_COMMENT,
1.2 kristaps 57: TEXICMD_COMMENT_LONG,
1.1 kristaps 58: TEXICMD_CONTENTS,
59: TEXICMD_COPYING,
60: TEXICMD_COPYRIGHT,
1.3 kristaps 61: TEXICMD_DEFTYPEFN,
62: TEXICMD_DEFTYPEFNX,
63: TEXICMD_DEFTYPEFUN,
64: TEXICMD_DEFTYPEFUNX,
65: TEXICMD_DEFTYPEVAR,
66: TEXICMD_DEFTYPEVR,
1.1 kristaps 67: TEXICMD_DETAILMENU,
1.3 kristaps 68: TEXICMD_DFN,
1.1 kristaps 69: TEXICMD_DIRCATEGORY,
70: TEXICMD_DIRENTRY,
1.3 kristaps 71: TEXICMD_DISPLAY,
1.2 kristaps 72: TEXICMD_DOTS,
1.1 kristaps 73: TEXICMD_EMAIL,
74: TEXICMD_EMPH,
75: TEXICMD_END,
1.2 kristaps 76: TEXICMD_ENUMERATE,
1.3 kristaps 77: TEXICMD_ENV,
1.1 kristaps 78: TEXICMD_EXAMPLE,
79: TEXICMD_FILE,
1.3 kristaps 80: TEXICMD_GROUP,
1.2 kristaps 81: TEXICMD_HEADING,
1.3 kristaps 82: TEXICMD_HEADINGS,
83: TEXICMD_HYPHEN,
1.1 kristaps 84: TEXICMD_I,
1.3 kristaps 85: TEXICMD_IFCLEAR,
1.1 kristaps 86: TEXICMD_IFHTML,
1.3 kristaps 87: TEXICMD_IFINFO,
1.1 kristaps 88: TEXICMD_IFNOTTEX,
89: TEXICMD_IFTEX,
1.3 kristaps 90: TEXICMD_IFSET,
1.1 kristaps 91: TEXICMD_IMAGE,
1.2 kristaps 92: TEXICMD_INCLUDE,
1.5 kristaps 93: TEXICMD_INSERTCOPYING,
1.1 kristaps 94: TEXICMD_ITEM,
95: TEXICMD_ITEMIZE,
96: TEXICMD_KBD,
97: TEXICMD_LATEX,
1.3 kristaps 98: TEXICMD_MATH,
1.1 kristaps 99: TEXICMD_MENU,
1.3 kristaps 100: TEXICMD_NEWLINE,
1.1 kristaps 101: TEXICMD_NODE,
1.3 kristaps 102: TEXICMD_NOINDENT,
1.8 kristaps 103: TEXICMD_PXREF,
1.3 kristaps 104: TEXICMD_QUESTIONMARK,
1.1 kristaps 105: TEXICMD_QUOTATION,
1.3 kristaps 106: TEXICMD_PAGE,
1.1 kristaps 107: TEXICMD_PARINDENT,
1.2 kristaps 108: TEXICMD_PRINTINDEX,
1.1 kristaps 109: TEXICMD_REF,
110: TEXICMD_SAMP,
1.7 kristaps 111: TEXICMD_SC,
1.1 kristaps 112: TEXICMD_SECTION,
1.3 kristaps 113: TEXICMD_SET,
1.1 kristaps 114: TEXICMD_SETCHAPNEWPAGE,
115: TEXICMD_SETFILENAME,
116: TEXICMD_SETTITLE,
1.3 kristaps 117: TEXICMD_SP,
118: TEXICMD_SPACE,
119: TEXICMD_SMALLEXAMPLE,
120: TEXICMD_SQUIGGLE_LEFT,
121: TEXICMD_SQUIGGLE_RIGHT,
1.8 kristaps 122: TEXICMD_STRONG,
1.1 kristaps 123: TEXICMD_SUBSECTION,
1.3 kristaps 124: TEXICMD_SUBTITLE,
125: TEXICMD_TAB,
1.1 kristaps 126: TEXICMD_TABLE,
127: TEXICMD_TEX,
128: TEXICMD_TEXSYM,
1.3 kristaps 129: TEXICMD_TITLE,
1.1 kristaps 130: TEXICMD_TITLEFONT,
131: TEXICMD_TITLEPAGE,
132: TEXICMD_TOP,
133: TEXICMD_UNNUMBERED,
1.2 kristaps 134: TEXICMD_UNNUMBEREDSEC,
1.3 kristaps 135: TEXICMD_UREF,
1.1 kristaps 136: TEXICMD_URL,
137: TEXICMD_VAR,
1.9 ! kristaps 138: TEXICMD_VSKIP,
1.3 kristaps 139: TEXICMD_W,
1.8 kristaps 140: TEXICMD_XREF,
1.1 kristaps 141: TEXICMD__MAX
142: };
143:
144: /*
145: * The file currently being parsed.
146: * This keeps track of our location within that file.
147: */
148: struct texifile {
149: const char *name; /* name of the file */
150: size_t line; /* current line (from zero) */
151: size_t col; /* current column in line (from zero) */
152: char *map; /* mmap'd file */
153: size_t mapsz; /* size of mmap */
154: };
155:
156: struct texi;
157:
1.2 kristaps 158: /*
159: * Callback for functions implementing texi commands.
160: */
1.1 kristaps 161: typedef void (*texicmdfp)(struct texi *,
162: enum texicmd, const char *, size_t, size_t *);
163:
164: /*
165: * Describes Texinfo commands, whether native or overriden.
166: */
167: struct texitok {
168: texicmdfp fp; /* callback (or NULL if none) */
169: const char *tok; /* name of the token */
170: size_t len; /* strlen(tok) */
171: };
172:
1.3 kristaps 173: enum texilist {
174: TEXILIST_NONE = 0,
175: TEXILIST_ITEM,
176: TEXILIST_NOITEM,
177: };
178:
1.1 kristaps 179: /*
180: * The main parse structure.
181: * This keeps any necessary information handy.
182: */
183: struct texi {
1.5 kristaps 184: struct texifile files[64]; /* stack of open files */
185: size_t filepos; /* number of open files */
186: size_t outcol; /* column in output line */
187: char **dirs; /* texi directories */
188: size_t dirsz; /* number of texi directories */
1.8 kristaps 189: enum texilist list; /* current list (set recursively) */
190: int outmacro; /* if >0, output is in line macro */
191: int seenws; /* ws has been seen (and ignored) */
192: int ign; /* if >0, don't print anything */
193: int literal; /* if >0, literal context */
1.1 kristaps 194: };
195:
1.8 kristaps 196: /* FIXME: FIND A BETTER WAY. */
1.1 kristaps 197: #define ismpunct(_x) \
198: ('.' == (_x) || \
199: ',' == (_x) || \
200: ';' == (_x))
1.8 kristaps 201: /* Texi disregards spaces and tabs. */
1.2 kristaps 202: #define isws(_x) \
203: (' ' == (_x) || '\t' == (_x))
1.9 ! kristaps 204: #define ismspace(_x) \
! 205: (isws((_x) || '\n' == (_x)))
1.1 kristaps 206:
1.3 kristaps 207: static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 208: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
209: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 210: static void dochapter(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 211: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 212: static void dodeftypefun(struct texi *, enum texicmd, const char *, size_t, size_t *);
213: static void dodeftypevar(struct texi *, enum texicmd, const char *, size_t, size_t *);
214: static void dodisplay(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 215: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 216: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 217: static void doenv(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 218: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
219: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 220: static void doignargn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 221: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
222: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
223: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 224: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 225: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
226: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
227: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 228: static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 229: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 230: static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 231: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
232: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
233: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
234: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 235: static void dosp(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 236: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
237: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
238:
239: static const struct texitok texitoks[TEXICMD__MAX] = {
1.8 kristaps 240: { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1 kristaps 241: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
242: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.3 kristaps 243: { dochapter, "appendix", 8 }, /* TEXICMD_APPENDIX */
244: { dochapter, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
245: { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */
1.1 kristaps 246: { dosymbol, "@", 1 }, /* TEXICMD_AT */
1.3 kristaps 247: { doignline, "author", 6 }, /* TEXICMD_AUTHOR */
248: { dosymbol, "!", 1 }, /* TEXICMD_BANG */
1.7 kristaps 249: { dosymbol, "bullet", 6 }, /* TEXICMD_BULLET */
1.1 kristaps 250: { dobye, "bye", 3 }, /* TEXICMD_BYE */
1.5 kristaps 251: { doignline, "center", 5 }, /* TEXICMD_CENTER */
1.3 kristaps 252: { dochapter, "chapter", 7 }, /* TEXICMD_CHAPTER */
1.1 kristaps 253: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
254: { doliteral, "code", 4 }, /* TEXICMD_CODE */
1.3 kristaps 255: { doitalic, "cite", 4 }, /* TEXICMD_CITE */
256: { dosymbol, ":", 1 }, /* TEXICMD_COLON */
1.1 kristaps 257: { docommand, "command", 7 }, /* TEXICMD_COMMAND */
258: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 kristaps 259: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 260: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
261: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
262: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
1.3 kristaps 263: { dodeftypefun, "deftypefn", 9 }, /* TEXICMD_DEFTYPEFN */
264: { dodeftypefun, "deftypefnx", 10 }, /* TEXICMD_DEFTYPEFNX */
265: { dodeftypefun, "deftypefun", 10 }, /* TEXICMD_DEFTYPEFUN */
266: { dodeftypefun, "deftypefunx", 11 }, /* TEXICMD_DEFTYPEFUNX */
267: { dodeftypevar, "deftypevar", 10 }, /* TEXICMD_DEFTYPEVAR */
268: { dodeftypevar, "deftypevr", 9 }, /* TEXICMD_DEFTYPEVR */
1.1 kristaps 269: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
1.3 kristaps 270: { doitalic, "dfn", 3 }, /* TEXICMD_DFN */
1.1 kristaps 271: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
272: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.3 kristaps 273: { dodisplay, "display", 7 }, /* TEXICMD_DISPLAY */
1.2 kristaps 274: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.8 kristaps 275: { dolink, "email", 5 }, /* TEXICMD_EMAIL */
1.1 kristaps 276: { doemph, "emph", 4 }, /* TEXICMD_EMPH */
277: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 kristaps 278: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.3 kristaps 279: { doenv, "env", 3 }, /* TEXICMD_ENV */
1.1 kristaps 280: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
281: { dofile, "file", 4 }, /* TEXICMD_FILE */
1.3 kristaps 282: { doblock, "group", 5 }, /* TEXICMD_GROUP */
1.2 kristaps 283: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.3 kristaps 284: { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */
285: { dosymbol, "-", 1 }, /* TEXICMD_HYPHEN */
1.1 kristaps 286: { doitalic, "i", 1 }, /* TEXICMD_I */
1.3 kristaps 287: { doignblock, "ifclear", 7 }, /* TEXICMD_IFCLEAR */
1.1 kristaps 288: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
1.3 kristaps 289: { doignblock, "ifinfo", 6 }, /* TEXICMD_IFINFO */
290: { doblock, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
1.1 kristaps 291: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
1.3 kristaps 292: { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
1.1 kristaps 293: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 kristaps 294: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.5 kristaps 295: { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
1.1 kristaps 296: { doitem, "item", 4 }, /* TEXICMD_ITEM */
297: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
298: { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
299: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
1.3 kristaps 300: { domath, "math", 4 }, /* TEXICMD_MATH */
1.1 kristaps 301: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
1.3 kristaps 302: { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */
1.1 kristaps 303: { doignline, "node", 4 }, /* TEXICMD_NODE */
1.3 kristaps 304: { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */
1.8 kristaps 305: { dolink, "pxref", 5 }, /* TEXICMD_PXREF */
1.3 kristaps 306: { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */
1.1 kristaps 307: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.3 kristaps 308: { doignline, "page", 4 }, /* TEXICMD_PAGE */
309: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
1.2 kristaps 310: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1 kristaps 311: { dobracket, "ref", 3 }, /* TEXICMD_REF */
312: { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
1.7 kristaps 313: { dobracket, "sc", 2 }, /* TEXICMD_SC */
1.1 kristaps 314: { dosection, "section", 7 }, /* TEXICMD_SECTION */
1.3 kristaps 315: { doignline, "set", 3 }, /* TEXICMD_SET */
1.1 kristaps 316: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
317: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
1.5 kristaps 318: { doignline, "settitle", 8 }, /* TEXICMD_SETTITLE */
1.3 kristaps 319: { dosp, "sp", 2 }, /* TEXICMD_SP */
320: { dosymbol, " ", 1 }, /* TEXICMD_SPACE */
321: { doexample, "smallexample", 12 }, /* TEXICMD_SMALLEXAMPLE */
322: { dosymbol, "{", 1 }, /* TEXICMD_SQUIGGLE_LEFT */
323: { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */
1.8 kristaps 324: { doemph, "strong", 6 }, /* TEXICMD_STRONG */
1.1 kristaps 325: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
1.3 kristaps 326: { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */
327: { dosymbol, "\t", 1 }, /* TEXICMD_TAB */
1.1 kristaps 328: { dotable, "table", 5 }, /* TEXICMD_TABLE */
329: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
330: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
1.3 kristaps 331: { doignline, "title", 5 }, /* TEXICMD_TITLE */
1.1 kristaps 332: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
333: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
334: { dotop, "top", 3 }, /* TEXICMD_TOP */
1.3 kristaps 335: { dochapter, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 kristaps 336: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.8 kristaps 337: { dolink, "uref", 4 }, /* TEXICMD_UREF */
338: { dolink, "url", 3 }, /* TEXICMD_URL */
1.1 kristaps 339: { doliteral, "var", 3 }, /* TEXICMD_VAR */
1.9 ! kristaps 340: { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */
1.3 kristaps 341: { dobracket, "w", 1 }, /* TEXICMD_W */
1.8 kristaps 342: { dolink, "xref", 4 }, /* TEXICMD_XREF */
1.1 kristaps 343: };
344:
1.2 kristaps 345: /*
346: * Unmap the top-most file that we're using.
347: */
1.1 kristaps 348: static void
349: texifilepop(struct texi *p)
350: {
351: struct texifile *f;
352:
353: assert(p->filepos > 0);
354: f = &p->files[--p->filepos];
355: munmap(f->map, f->mapsz);
356: }
357:
1.2 kristaps 358: /*
1.8 kristaps 359: * Unmap all files that we're currently using and free all resources
360: * that we've allocated during the parse.
1.2 kristaps 361: * The utility should exit(...) after this is called.
362: */
1.1 kristaps 363: static void
364: texiexit(struct texi *p)
365: {
1.5 kristaps 366: size_t i;
367:
368: if (p->outcol)
369: putchar('\n');
1.1 kristaps 370:
371: while (p->filepos > 0)
372: texifilepop(p);
1.5 kristaps 373:
374: for (i = 0; i < p->dirsz; i++)
375: free(p->dirs[i]);
376: free(p->dirs);
1.1 kristaps 377: }
378:
1.2 kristaps 379: /*
380: * Fatal error: unmap all files and exit.
381: * The "errstring" is passed to perror(3).
382: */
1.1 kristaps 383: static void
1.2 kristaps 384: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 385: {
386:
387: perror(errstring);
388: texiexit(p);
389: exit(EXIT_FAILURE);
390: }
391:
392: /*
393: * Print a generic warning message (to stderr) tied to our current
394: * location in the parse sequence.
395: */
396: static void
397: texiwarn(const struct texi *p, const char *fmt, ...)
398: {
399: va_list ap;
400:
1.2 kristaps 401: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 402: p->files[p->filepos - 1].name,
403: p->files[p->filepos - 1].line + 1,
404: p->files[p->filepos - 1].col + 1);
405: va_start(ap, fmt);
406: vfprintf(stderr, fmt, ap);
407: va_end(ap);
408: fputc('\n', stderr);
409: }
410:
1.8 kristaps 411: /*
412: * Print an error message (to stderr) tied to our current location in
413: * the parse sequence, invoke texiexit(), then die.
414: */
1.2 kristaps 415: static void
416: texierr(struct texi *p, const char *fmt, ...)
417: {
418: va_list ap;
419:
420: fprintf(stderr, "%s:%zu:%zu: error: ",
421: p->files[p->filepos - 1].name,
422: p->files[p->filepos - 1].line + 1,
423: p->files[p->filepos - 1].col + 1);
424: va_start(ap, fmt);
425: vfprintf(stderr, fmt, ap);
426: va_end(ap);
427: fputc('\n', stderr);
428: texiexit(p);
429: exit(EXIT_FAILURE);
430: }
431:
1.1 kristaps 432: /*
1.8 kristaps 433: * Put a single data character to the output if we're not ignoring.
434: * Adjusts our output status.
1.1 kristaps 435: */
436: static void
437: texiputchar(struct texi *p, char c)
438: {
439:
1.3 kristaps 440: if (p->ign)
1.1 kristaps 441: return;
442: putchar(c);
443: if ('\n' == c) {
444: p->outcol = 0;
445: p->seenws = 0;
446: } else
447: p->outcol++;
448: }
449:
450: /*
451: * Put multiple characters (see texiputchar()).
452: */
453: static void
454: texiputchars(struct texi *p, const char *s)
455: {
456:
457: while ('\0' != *s)
458: texiputchar(p, *s++);
459: }
460:
461: /*
1.8 kristaps 462: * Close an mdoc(7) macro opened with teximacroopen().
463: * If there are no more macros on the line, prints a newline.
1.1 kristaps 464: */
465: static void
1.3 kristaps 466: teximacroclose(struct texi *p)
467: {
468:
1.8 kristaps 469: /* FIXME: punctuation. */
1.5 kristaps 470: if (0 == --p->outmacro)
471: texiputchar(p, '\n');
1.3 kristaps 472: }
473:
474: /*
1.8 kristaps 475: * Open a mdoc(7) macro.
476: * This is used for line macros, e.g., Qq [foo bar baz].
477: * It can be invoked for nested macros, e.g., Qq Li foo .
1.3 kristaps 478: */
479: static void
480: teximacroopen(struct texi *p, const char *s)
1.1 kristaps 481: {
482:
1.5 kristaps 483: if (p->outcol && 0 == p->outmacro)
484: texiputchar(p, '\n');
485: if (0 == p->outmacro)
486: texiputchar(p, '.');
487: else
488: texiputchar(p, ' ');
489: texiputchars(p, s);
490: texiputchar(p, ' ');
1.3 kristaps 491: p->outmacro++;
1.5 kristaps 492: p->seenws = 0;
1.1 kristaps 493: }
494:
495: /*
1.8 kristaps 496: * Put a stadnalone mdoc(7) command with the trailing newline.
1.1 kristaps 497: */
498: static void
499: teximacro(struct texi *p, const char *s)
500: {
501:
1.4 kristaps 502: if (p->outmacro)
503: texierr(p, "\"%s\" in open line scope!?", s);
504: else if (p->literal)
505: texierr(p, "\"%s\" in a literal scope!?", s);
506:
1.1 kristaps 507: if (p->outcol)
508: texiputchar(p, '\n');
1.5 kristaps 509:
510: texiputchar(p, '.');
511: texiputchars(p, s);
512: texiputchar(p, '\n');
1.1 kristaps 513: }
514:
515: /*
516: * Advance by a single byte in the input stream.
517: */
518: static void
519: advance(struct texi *p, const char *buf, size_t *pos)
520: {
521:
522: if ('\n' == buf[*pos]) {
523: p->files[p->filepos - 1].line++;
524: p->files[p->filepos - 1].col = 0;
525: } else
526: p->files[p->filepos - 1].col++;
527:
528: (*pos)++;
529: }
530:
531: /*
532: * Advance to the next non-whitespace word in the input stream.
533: * If we're in literal mode, then print all of the whitespace as we're
534: * doing so.
535: */
536: static size_t
537: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
538: {
539:
1.3 kristaps 540: if (p->literal) {
1.9 ! kristaps 541: while (*pos < sz && ismspace(buf[*pos])) {
1.5 kristaps 542: if (*pos && '\n' == buf[*pos] &&
543: '\\' == buf[*pos - 1])
544: texiputchar(p, 'e');
1.1 kristaps 545: texiputchar(p, buf[*pos]);
546: advance(p, buf, pos);
547: }
548: return(*pos);
549: }
550:
1.9 ! kristaps 551: while (*pos < sz && ismspace(buf[*pos])) {
1.1 kristaps 552: p->seenws = 1;
553: /*
554: * If it looks like we've printed a double-line, then
555: * output a paragraph.
556: * FIXME: this is stupid.
557: */
1.5 kristaps 558: if (*pos && '\n' == buf[*pos] &&
559: '\n' == buf[*pos - 1])
560: teximacro(p, "Pp");
1.1 kristaps 561: advance(p, buf, pos);
562: }
563: return(*pos);
564: }
565:
566: /*
567: * Advance to the EOLN in the input stream.
568: */
569: static size_t
1.3 kristaps 570: advanceeoln(struct texi *p, const char *buf,
571: size_t sz, size_t *pos, int consumenl)
1.1 kristaps 572: {
573:
1.8 kristaps 574: /* FIXME: disregards @NEWLINE. */
1.1 kristaps 575: while (*pos < sz && '\n' != buf[*pos])
576: advance(p, buf, pos);
1.3 kristaps 577: if (*pos < sz && consumenl)
578: advance(p, buf, pos);
1.1 kristaps 579: return(*pos);
580: }
581:
582: /*
583: * Advance to position "end", which is an absolute position in the
584: * current buffer greater than or equal to the current position.
585: */
586: static void
587: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
588: {
589:
590: assert(*pos <= end);
591: while (*pos < end)
592: advance(p, buf, pos);
593: }
594:
595: /*
596: * Output a free-form word in the input stream, progressing to the next
597: * command or white-space.
598: * This also will advance the input stream.
599: */
600: static void
1.8 kristaps 601: texiword(struct texi *p, const char *buf,
602: size_t sz, size_t *pos, char extra)
1.1 kristaps 603: {
604:
1.3 kristaps 605: if (0 == p->outmacro && p->outcol > 72 && 0 == p->literal)
1.1 kristaps 606: texiputchar(p, '\n');
1.8 kristaps 607: /* FIXME: abstract this: we use it elsewhere. */
1.3 kristaps 608: if (p->seenws && p->outcol && 0 == p->literal)
1.1 kristaps 609: texiputchar(p, ' ');
610:
611: p->seenws = 0;
612:
1.9 ! kristaps 613: while (*pos < sz && ! ismspace(buf[*pos])) {
1.1 kristaps 614: switch (buf[*pos]) {
615: case ('@'):
616: case ('}'):
617: case ('{'):
618: return;
619: }
1.8 kristaps 620: if ('\0' != extra && buf[*pos] == extra)
621: return;
1.1 kristaps 622: if (*pos < sz - 1 &&
623: '`' == buf[*pos] &&
624: '`' == buf[*pos + 1]) {
625: texiputchars(p, "\\(lq");
626: advance(p, buf, pos);
627: } else if (*pos < sz - 1 &&
628: '\'' == buf[*pos] &&
629: '\'' == buf[*pos + 1]) {
630: texiputchars(p, "\\(rq");
631: advance(p, buf, pos);
632: } else
633: texiputchar(p, buf[*pos]);
634: advance(p, buf, pos);
635: }
636: }
637:
1.8 kristaps 638: /*
639: * Look up the command at position "pos" in the buffer, returning it (or
640: * TEXICMD__MAX if none found) and setting "end" to be the absolute
641: * index after the command name.
642: */
1.1 kristaps 643: static enum texicmd
644: texicmd(struct texi *p, const char *buf,
645: size_t pos, size_t sz, size_t *end)
646: {
647: size_t i, len;
648:
649: assert('@' == buf[pos]);
1.3 kristaps 650:
1.9 ! kristaps 651: if ((*end = pos) == sz)
! 652: return(TEXICMD__MAX);
! 653: else if ((*end = ++pos) == sz)
1.3 kristaps 654: return(TEXICMD__MAX);
655:
656: /* Alphabetic commands are special. */
657: if ( ! isalpha(buf[pos])) {
1.9 ! kristaps 658: if ((*end = pos + 1) == sz)
! 659: return(TEXICMD__MAX);
1.3 kristaps 660: for (i = 0; i < TEXICMD__MAX; i++) {
661: if (1 != texitoks[i].len)
662: continue;
663: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
664: return(i);
665: }
666: texiwarn(p, "bad command: @%c", buf[pos]);
667: return(TEXICMD__MAX);
668: }
669:
1.9 ! kristaps 670: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
1.3 kristaps 671: if ((*end > pos && ('@' == buf[*end] ||
672: '{' == buf[*end] || '}' == buf[*end])))
1.1 kristaps 673: break;
674:
675: len = *end - pos;
676: for (i = 0; i < TEXICMD__MAX; i++) {
677: if (len != texitoks[i].len)
678: continue;
679: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
680: return(i);
681: }
682:
1.3 kristaps 683: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
1.1 kristaps 684: return(TEXICMD__MAX);
685: }
686:
1.8 kristaps 687: /*
688: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
689: * Num should be set to the argument we're currently parsing, although
690: * it suffixes for it to be zero or non-zero.
691: * This will return 1 if there are more arguments, 0 otherwise.
692: * This will stop (returning 0) in the event of EOF or if we're not at a
693: * bracket for the zeroth parse.
694: */
695: static int
696: parsearg(struct texi *p, const char *buf,
697: size_t sz, size_t *pos, size_t num)
698: {
699: size_t end;
700: enum texicmd cmd;
701:
1.9 ! kristaps 702: while (*pos < sz && ismspace(buf[*pos]))
1.8 kristaps 703: advance(p, buf, pos);
704: if (*pos == sz || (0 == num && '{' != buf[*pos]))
705: return(0);
706: if (0 == num)
707: advance(p, buf, pos);
708:
709: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
710: switch (buf[*pos]) {
711: case (','):
712: advance(p, buf, pos);
713: return(1);
714: case ('}'):
715: advance(p, buf, pos);
716: return(0);
717: case ('{'):
718: if (0 == p->ign)
719: texiwarn(p, "unexpected \"{\"");
720: advance(p, buf, pos);
721: continue;
722: case ('@'):
723: break;
724: default:
725: texiword(p, buf, sz, pos, ',');
726: continue;
727: }
728:
729: cmd = texicmd(p, buf, *pos, sz, &end);
730: advanceto(p, buf, pos, end);
731: if (TEXICMD__MAX == cmd)
732: continue;
733: if (NULL != texitoks[cmd].fp)
734: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
735: }
736: return(0);
737: }
738:
739: /*
740: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
741: * This will stop in the event of EOF or if we're not at a bracket.
742: */
1.1 kristaps 743: static void
744: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
745: {
746: size_t end;
747: enum texicmd cmd;
748:
1.9 ! kristaps 749: while (*pos < sz && ismspace(buf[*pos]))
1.3 kristaps 750: advance(p, buf, pos);
751:
1.1 kristaps 752: if (*pos == sz || '{' != buf[*pos])
753: return;
754: advance(p, buf, pos);
755:
756: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
757: switch (buf[*pos]) {
758: case ('}'):
759: advance(p, buf, pos);
760: return;
761: case ('{'):
1.3 kristaps 762: if (0 == p->ign)
763: texiwarn(p, "unexpected \"{\"");
764: advance(p, buf, pos);
765: continue;
766: case ('@'):
767: break;
768: default:
1.8 kristaps 769: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 770: continue;
771: }
772:
773: cmd = texicmd(p, buf, *pos, sz, &end);
774: advanceto(p, buf, pos, end);
775: if (TEXICMD__MAX == cmd)
776: continue;
777: if (NULL != texitoks[cmd].fp)
778: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
779: }
780: }
781:
782: /*
783: * This should be invoked when we're on a macro line and want to process
784: * to the end of the current input line, doing all of our macros along
785: * the way.
786: */
787: static void
788: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
789: {
790: size_t end;
791: enum texicmd cmd;
792:
793: assert(0 == p->literal);
794:
795: while (*pos < sz && '\n' != buf[*pos]) {
796: while (*pos < sz && isws(buf[*pos])) {
797: p->seenws = 1;
798: advance(p, buf, pos);
799: }
800: switch (buf[*pos]) {
801: case ('}'):
802: if (0 == p->ign)
803: texiwarn(p, "unexpected \"}\"");
804: advance(p, buf, pos);
805: continue;
806: case ('{'):
807: if (0 == p->ign)
808: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 809: advance(p, buf, pos);
810: continue;
811: case ('@'):
812: break;
813: default:
1.8 kristaps 814: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 815: continue;
816: }
817:
818: cmd = texicmd(p, buf, *pos, sz, &end);
819: advanceto(p, buf, pos, end);
820: if (TEXICMD__MAX == cmd)
821: continue;
822: if (NULL != texitoks[cmd].fp)
823: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
824: }
825: }
826:
1.8 kristaps 827: /*
828: * Parse a single word or command.
829: * This will return immediately at the EOF.
830: */
1.1 kristaps 831: static void
1.3 kristaps 832: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
833: {
834: size_t end;
835: enum texicmd cmd;
836:
837: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
838: return;
839:
840: switch (buf[*pos]) {
841: case ('}'):
842: if (0 == p->ign)
843: texiwarn(p, "unexpected \"}\"");
844: advance(p, buf, pos);
845: return;
846: case ('{'):
847: if (0 == p->ign)
848: texiwarn(p, "unexpected \"{\"");
849: advance(p, buf, pos);
850: return;
851: case ('@'):
852: break;
853: default:
1.8 kristaps 854: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 855: return;
856: }
857:
858: cmd = texicmd(p, buf, *pos, sz, &end);
859: advanceto(p, buf, pos, end);
860: if (TEXICMD__MAX == cmd)
861: return;
862: if (NULL != texitoks[cmd].fp)
863: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
864: }
865:
1.8 kristaps 866: /*
867: * Parse til the end of the buffer.
868: */
1.3 kristaps 869: static void
1.7 kristaps 870: parseeof(struct texi *p, const char *buf, size_t sz)
871: {
872: size_t pos;
873:
874: for (pos = 0; pos < sz; )
875: parsesingle(p, buf, sz, &pos);
876: }
877:
1.8 kristaps 878: /*
879: * Parse a block sequence until we have the "@end endtoken" command
880: * invocation.
881: * This will return immediately at EOF.
882: */
1.7 kristaps 883: static void
1.1 kristaps 884: parseto(struct texi *p, const char *buf,
885: size_t sz, size_t *pos, const char *endtoken)
886: {
887: size_t end;
888: enum texicmd cmd;
889: size_t endtoksz;
890:
891: endtoksz = strlen(endtoken);
892: assert(endtoksz > 0);
893:
894: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
895: switch (buf[*pos]) {
896: case ('}'):
1.3 kristaps 897: if (0 == p->ign)
898: texiwarn(p, "unexpected \"}\"");
1.1 kristaps 899: advance(p, buf, pos);
900: continue;
901: case ('{'):
1.3 kristaps 902: if (0 == p->ign)
903: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 904: advance(p, buf, pos);
905: continue;
906: case ('@'):
907: break;
908: default:
1.8 kristaps 909: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 910: continue;
911: }
912:
913: cmd = texicmd(p, buf, *pos, sz, &end);
914: advanceto(p, buf, pos, end);
915: if (TEXICMD_END == cmd) {
1.2 kristaps 916: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 917: advance(p, buf, pos);
918: /*
1.8 kristaps 919: * FIXME: check the full word, not just its
920: * initial substring!
1.1 kristaps 921: */
922: if (sz - *pos >= endtoksz && 0 == strncmp
923: (&buf[*pos], endtoken, endtoksz)) {
1.3 kristaps 924: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 925: break;
926: }
1.3 kristaps 927: if (0 == p->ign)
928: texiwarn(p, "unexpected \"end\"");
929: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 930: continue;
931: } else if (TEXICMD__MAX != cmd)
932: if (NULL != texitoks[cmd].fp)
933: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
934: }
935: }
936:
1.8 kristaps 937: /*
938: * Memory-map the file "fname" and begin parsing it.
939: * This can be called in a nested context.
940: */
1.1 kristaps 941: static void
1.2 kristaps 942: parsefile(struct texi *p, const char *fname)
943: {
944: struct texifile *f;
945: int fd;
946: struct stat st;
947:
948: assert(p->filepos < 64);
949: f = &p->files[p->filepos];
950: memset(f, 0, sizeof(struct texifile));
951:
952: f->name = fname;
953: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
954: texiabort(p, fname);
955: } else if (-1 == fstat(fd, &st)) {
956: close(fd);
957: texiabort(p, fname);
958: }
959:
960: f->mapsz = st.st_size;
961: f->map = mmap(NULL, f->mapsz,
962: PROT_READ, MAP_SHARED, fd, 0);
963: close(fd);
964:
965: if (MAP_FAILED == f->map)
966: texiabort(p, fname);
967:
968: p->filepos++;
969: parseeof(p, f->map, f->mapsz);
970: texifilepop(p);
971: }
972:
973: static void
1.3 kristaps 974: dodeftypevar(struct texi *p, enum texicmd cmd,
975: const char *buf, size_t sz, size_t *pos)
976: {
977: const char *blk;
978:
979: blk = TEXICMD_DEFTYPEVR == cmd ?
980: "deftypevr" : "deftypevar";
981:
982: if (p->ign) {
983: parseto(p, buf, sz, pos, blk);
984: return;
985: }
986:
1.5 kristaps 987: teximacro(p, "Pp");
1.3 kristaps 988: if (TEXICMD_DEFTYPEVR == cmd) {
989: parsebracket(p, buf, sz, pos);
990: texiputchars(p, ":\n");
991: }
1.5 kristaps 992: teximacroopen(p, "Vt");
1.4 kristaps 993: parseeoln(p, buf, sz, pos);
1.3 kristaps 994: teximacroclose(p);
1.5 kristaps 995: teximacro(p, "Pp");
1.3 kristaps 996: parseto(p, buf, sz, pos, blk);
997: }
998:
999: static void
1000: dodeftypefun(struct texi *p, enum texicmd cmd,
1001: const char *buf, size_t sz, size_t *pos)
1002: {
1003: const char *blk;
1004:
1.5 kristaps 1005: blk = NULL;
1.3 kristaps 1006: switch (cmd) {
1007: case (TEXICMD_DEFTYPEFN):
1008: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1009: blk = texitoks[cmd].tok;
1.3 kristaps 1010: break;
1.5 kristaps 1011: default:
1.3 kristaps 1012: break;
1013: }
1014:
1015: if (p->ign) {
1016: if (NULL != blk)
1017: parseto(p, buf, sz, pos, blk);
1018: return;
1019: }
1020:
1021: switch (cmd) {
1022: case (TEXICMD_DEFTYPEFN):
1023: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1024: teximacro(p, "Pp");
1.3 kristaps 1025: break;
1026: default:
1027: break;
1028: }
1029: if (TEXICMD_DEFTYPEFN == cmd ||
1030: TEXICMD_DEFTYPEFNX == cmd) {
1031: parsebracket(p, buf, sz, pos);
1032: texiputchars(p, ":\n");
1033: }
1.5 kristaps 1034: teximacroopen(p, "Ft");
1.3 kristaps 1035: parsesingle(p, buf, sz, pos);
1036: teximacroclose(p);
1.5 kristaps 1037: teximacroopen(p, "Fn");
1.3 kristaps 1038: parsesingle(p, buf, sz, pos);
1039: teximacroclose(p);
1.5 kristaps 1040: teximacroopen(p, "Li");
1.4 kristaps 1041: parseeoln(p, buf, sz, pos);
1.3 kristaps 1042: teximacroclose(p);
1.5 kristaps 1043: teximacro(p, "Pp");
1.3 kristaps 1044: if (NULL != blk)
1045: parseto(p, buf, sz, pos, blk);
1046: }
1047:
1048: static void
1.1 kristaps 1049: doignblock(struct texi *p, enum texicmd cmd,
1050: const char *buf, size_t sz, size_t *pos)
1051: {
1052:
1.3 kristaps 1053: p->ign++;
1.5 kristaps 1054: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1055: p->ign--;
1.1 kristaps 1056: }
1057:
1058: static void
1.3 kristaps 1059: doblock(struct texi *p, enum texicmd cmd,
1.1 kristaps 1060: const char *buf, size_t sz, size_t *pos)
1061: {
1062:
1.5 kristaps 1063: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.1 kristaps 1064: }
1065:
1066: static void
1067: doinline(struct texi *p, const char *buf,
1068: size_t sz, size_t *pos, const char *macro)
1069: {
1070:
1.5 kristaps 1071: teximacroopen(p, macro);
1.1 kristaps 1072: p->seenws = 0;
1073: parsebracket(p, buf, sz, pos);
1074: if (*pos < sz - 1 &&
1075: ismpunct(buf[*pos]) &&
1.9 ! kristaps 1076: ismspace(buf[*pos + 1])) {
1.1 kristaps 1077: texiputchar(p, ' ');
1078: texiputchar(p, buf[*pos]);
1079: advance(p, buf, pos);
1080: }
1.5 kristaps 1081: teximacroclose(p);
1.1 kristaps 1082: }
1083:
1084: static void
1.2 kristaps 1085: doinclude(struct texi *p, enum texicmd cmd,
1086: const char *buf, size_t sz, size_t *pos)
1087: {
1088: char fname[PATH_MAX], path[PATH_MAX];
1089: size_t i;
1090: int rc;
1091:
1092: while (*pos < sz && ' ' == buf[*pos])
1093: advance(p, buf, pos);
1094:
1095: /* Read in the filename. */
1096: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1097: if (i == sizeof(fname) - 1)
1098: break;
1099: fname[i] = buf[*pos];
1100: advance(p, buf, pos);
1101: }
1102:
1103: if (i == 0)
1104: texierr(p, "path too short");
1105: else if ('\n' != buf[*pos])
1106: texierr(p, "path too long");
1107: else if ('/' == fname[0])
1108: texierr(p, "no absolute paths");
1109: fname[i] = '\0';
1110:
1111: if (strstr(fname, "../") || strstr(fname, "/.."))
1112: texierr(p, "insecure path");
1113:
1.5 kristaps 1114: for (i = 0; i < p->dirsz; i++) {
1115: rc = snprintf(path, sizeof(path),
1116: "%s/%s", p->dirs[i], fname);
1117: if (rc < 0)
1118: texierr(p, "couldn't format path");
1119: else if ((size_t)rc >= sizeof(path))
1120: texierr(p, "path too long");
1121: else if (-1 == access(path, R_OK))
1122: continue;
1123:
1124: parsefile(p, path);
1125: return;
1126: }
1.2 kristaps 1127:
1.5 kristaps 1128: texierr(p, "couldn't find %s in includes", fname);
1.2 kristaps 1129: }
1130:
1131: static void
1.1 kristaps 1132: doitalic(struct texi *p, enum texicmd cmd,
1133: const char *buf, size_t sz, size_t *pos)
1134: {
1135:
1136: texiputchars(p, "\\fI");
1137: parsebracket(p, buf, sz, pos);
1138: texiputchars(p, "\\fP");
1139: }
1140:
1141: static void
1.3 kristaps 1142: doenv(struct texi *p, enum texicmd cmd,
1143: const char *buf, size_t sz, size_t *pos)
1144: {
1145:
1146: if (p->literal)
1147: parsebracket(p, buf, sz, pos);
1148: else
1149: doinline(p, buf, sz, pos, "Ev");
1150: }
1151:
1152: static void
1.1 kristaps 1153: doliteral(struct texi *p, enum texicmd cmd,
1154: const char *buf, size_t sz, size_t *pos)
1155: {
1156:
1.3 kristaps 1157: if (p->literal)
1.1 kristaps 1158: parsebracket(p, buf, sz, pos);
1159: else
1160: doinline(p, buf, sz, pos, "Li");
1161: }
1162:
1163: static void
1164: doemph(struct texi *p, enum texicmd cmd,
1165: const char *buf, size_t sz, size_t *pos)
1166: {
1167:
1.3 kristaps 1168: if (p->literal)
1.1 kristaps 1169: doitalic(p, cmd, buf, sz, pos);
1170: else
1171: doinline(p, buf, sz, pos, "Em");
1172: }
1173:
1174: static void
1175: docommand(struct texi *p, enum texicmd cmd,
1176: const char *buf, size_t sz, size_t *pos)
1177: {
1178:
1179: doinline(p, buf, sz, pos, "Xr");
1180: }
1181:
1182: static void
1183: dobracket(struct texi *p, enum texicmd cmd,
1184: const char *buf, size_t sz, size_t *pos)
1185: {
1186:
1187: parsebracket(p, buf, sz, pos);
1188: }
1189:
1190: static void
1191: dofile(struct texi *p, enum texicmd cmd,
1192: const char *buf, size_t sz, size_t *pos)
1193: {
1194:
1.3 kristaps 1195: if (p->literal)
1.1 kristaps 1196: parsebracket(p, buf, sz, pos);
1197: else
1198: doinline(p, buf, sz, pos, "Pa");
1199: }
1200:
1201: static void
1.3 kristaps 1202: dodisplay(struct texi *p, enum texicmd cmd,
1203: const char *buf, size_t sz, size_t *pos)
1204: {
1205:
1.5 kristaps 1206: teximacro(p, "Bd -display -offset indent");
1.3 kristaps 1207: advanceeoln(p, buf, sz, pos, 1);
1208: parseto(p, buf, sz, pos, "display");
1.5 kristaps 1209: teximacro(p, "Ed");
1.3 kristaps 1210: }
1211:
1212: static void
1.1 kristaps 1213: doexample(struct texi *p, enum texicmd cmd,
1214: const char *buf, size_t sz, size_t *pos)
1215: {
1.3 kristaps 1216: const char *blk;
1217:
1218: blk = TEXICMD_EXAMPLE == cmd ? "example" : "smallexample";
1.1 kristaps 1219:
1.5 kristaps 1220: teximacro(p, "Bd -literal -offset indent");
1.3 kristaps 1221: advanceeoln(p, buf, sz, pos, 1);
1222: p->literal++;
1223: parseto(p, buf, sz, pos, blk);
1224: p->literal--;
1.5 kristaps 1225: teximacro(p, "Ed");
1.1 kristaps 1226: }
1227:
1228: static void
1229: dobye(struct texi *p, enum texicmd cmd,
1230: const char *buf, size_t sz, size_t *pos)
1231: {
1232:
1233: texiexit(p);
1234: exit(EXIT_SUCCESS);
1235: }
1236:
1237: static void
1238: dosymbol(struct texi *p, enum texicmd cmd,
1239: const char *buf, size_t sz, size_t *pos)
1240: {
1241:
1.3 kristaps 1242: if (p->seenws && p->outcol && 0 == p->literal) {
1243: texiputchar(p, ' ');
1244: p->seenws = 0;
1245: }
1246:
1.1 kristaps 1247: switch (cmd) {
1.3 kristaps 1248: case (TEXICMD_ASTERISK):
1249: case (TEXICMD_NEWLINE):
1250: case (TEXICMD_SPACE):
1251: case (TEXICMD_TAB):
1252: texiputchar(p, ' ');
1253: break;
1.1 kristaps 1254: case (TEXICMD_AT):
1.3 kristaps 1255: texiputchar(p, '@');
1256: break;
1257: case (TEXICMD_BANG):
1258: texiputchar(p, '!');
1.7 kristaps 1259: break;
1260: case (TEXICMD_BULLET):
1261: texiputchars(p, "\\(bu");
1.1 kristaps 1262: break;
1263: case (TEXICMD_COPYRIGHT):
1264: texiputchars(p, "\\(co");
1265: break;
1.2 kristaps 1266: case (TEXICMD_DOTS):
1267: texiputchars(p, "...");
1268: break;
1.1 kristaps 1269: case (TEXICMD_LATEX):
1270: texiputchars(p, "LaTeX");
1271: break;
1.3 kristaps 1272: case (TEXICMD_QUESTIONMARK):
1273: texiputchar(p, '?');
1274: break;
1275: case (TEXICMD_SQUIGGLE_LEFT):
1276: texiputchars(p, "{");
1277: break;
1278: case (TEXICMD_SQUIGGLE_RIGHT):
1279: texiputchars(p, "}");
1280: break;
1.1 kristaps 1281: case (TEXICMD_TEXSYM):
1282: texiputchars(p, "TeX");
1283: break;
1.3 kristaps 1284: case (TEXICMD_COLON):
1285: case (TEXICMD_HYPHEN):
1286: break;
1.1 kristaps 1287: default:
1.5 kristaps 1288: texiwarn(p, "sym: %d", cmd);
1.1 kristaps 1289: abort();
1290: }
1291:
1.5 kristaps 1292: if (texitoks[cmd].len > 1)
1293: doignbracket(p, cmd, buf, sz, pos);
1.1 kristaps 1294: }
1295:
1296: static void
1297: doquotation(struct texi *p, enum texicmd cmd,
1298: const char *buf, size_t sz, size_t *pos)
1299: {
1300:
1.5 kristaps 1301: teximacro(p, "Qo");
1.1 kristaps 1302: parseto(p, buf, sz, pos, "quotation");
1.5 kristaps 1303: teximacro(p, "Qc");
1.1 kristaps 1304: }
1305:
1.3 kristaps 1306: static void
1307: domath(struct texi *p, enum texicmd cmd,
1308: const char *buf, size_t sz, size_t *pos)
1309: {
1310: size_t nest;
1311:
1312: /*
1313: * Math handling is different from everything else.
1314: * We don't allow any subcomponents, and we ignore the rules in
1315: * terms of @-commands.
1316: * This departs from GNU's rules, but whatever.
1317: */
1318: while (*pos < sz && isws(buf[*pos]))
1319: advance(p, buf, pos);
1320: if (*pos == sz || '{' != buf[*pos])
1321: return;
1322: advance(p, buf, pos);
1323: if (p->seenws && p->outcol && 0 == p->literal)
1324: texiputchar(p, ' ');
1325: p->seenws = 0;
1326: for (nest = 1; *pos < sz && nest > 0; ) {
1327: if ('{' == buf[*pos])
1328: nest++;
1329: else if ('}' == buf[*pos])
1330: if (0 == --nest)
1331: continue;
1332: texiputchar(p, buf[*pos]);
1333: advance(p, buf, pos);
1334: }
1335: if (*pos == sz)
1336: return;
1337: assert('}' == buf[*pos]);
1338: advance(p, buf, pos);
1339: }
1340:
1.1 kristaps 1341: static void
1.8 kristaps 1342: dolink(struct texi *p, enum texicmd cmd,
1.1 kristaps 1343: const char *buf, size_t sz, size_t *pos)
1344: {
1.8 kristaps 1345: int c;
1.1 kristaps 1346:
1347: switch (cmd) {
1348: case (TEXICMD_EMAIL):
1.5 kristaps 1349: teximacroopen(p, "Mt");
1.1 kristaps 1350: break;
1.3 kristaps 1351: case (TEXICMD_UREF):
1.1 kristaps 1352: case (TEXICMD_URL):
1.5 kristaps 1353: teximacroopen(p, "Lk");
1.1 kristaps 1354: break;
1.8 kristaps 1355: case (TEXICMD_XREF):
1356: texiputchars(p, "See Section");
1357: teximacroopen(p, "Qq");
1358: break;
1359: case (TEXICMD_PXREF):
1360: texiputchars(p, "see Section");
1361: teximacroopen(p, "Qq");
1362: break;
1.1 kristaps 1363: default:
1.8 kristaps 1364: abort();
1.1 kristaps 1365: }
1.8 kristaps 1366:
1367: c = parsearg(p, buf, sz, pos, 0);
1368: p->ign++;
1369: while (c > 0)
1370: c = parsearg(p, buf, sz, pos, 1);
1371: p->ign--;
1372:
1.1 kristaps 1373: if (*pos < sz - 1 &&
1374: ismpunct(buf[*pos]) &&
1.9 ! kristaps 1375: ismspace(buf[*pos + 1])) {
1.1 kristaps 1376: texiputchar(p, ' ');
1377: texiputchar(p, buf[*pos]);
1378: advance(p, buf, pos);
1379: }
1.8 kristaps 1380:
1381: teximacroclose(p);
1382: }
1383:
1384: static void
1385: doignargn(struct texi *p, enum texicmd cmd,
1386: const char *buf, size_t sz, size_t *pos)
1387: {
1388: int c;
1389:
1390: c = parsearg(p, buf, sz, pos, 0);
1391: p->ign++;
1392: while (c > 0)
1393: c = parsearg(p, buf, sz, pos, 1);
1394: p->ign--;
1.1 kristaps 1395: }
1396:
1397: static void
1398: dosubsection(struct texi *p, enum texicmd cmd,
1399: const char *buf, size_t sz, size_t *pos)
1400: {
1401:
1.5 kristaps 1402: teximacro(p, "Pp");
1403: teximacroopen(p, "Em");
1.3 kristaps 1404: parseeoln(p, buf, sz, pos);
1.5 kristaps 1405: teximacroclose(p);
1406: teximacro(p, "Pp");
1.1 kristaps 1407: }
1408:
1409: static void
1410: dosection(struct texi *p, enum texicmd cmd,
1411: const char *buf, size_t sz, size_t *pos)
1412: {
1413:
1.3 kristaps 1414: if (p->outmacro)
1415: texierr(p, "subsection in open line scope!?");
1416: else if (p->literal)
1417: texierr(p, "subsection in a literal scope!?");
1418:
1.5 kristaps 1419: teximacroopen(p, "Ss");
1.3 kristaps 1420: parseeoln(p, buf, sz, pos);
1421: teximacroclose(p);
1422: }
1423:
1424: static void
1425: dosp(struct texi *p, enum texicmd cmd,
1426: const char *buf, size_t sz, size_t *pos)
1427: {
1428:
1.5 kristaps 1429: teximacro(p, "Pp");
1.3 kristaps 1430: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1431: }
1432:
1433: static void
1.3 kristaps 1434: dochapter(struct texi *p, enum texicmd cmd,
1.1 kristaps 1435: const char *buf, size_t sz, size_t *pos)
1436: {
1437:
1.3 kristaps 1438: if (p->outmacro)
1439: texierr(p, "section in open line scope!?");
1440: else if (p->literal)
1441: texierr(p, "section in a literal scope!?");
1442:
1.5 kristaps 1443: teximacroopen(p, "Sh");
1.3 kristaps 1444: parseeoln(p, buf, sz, pos);
1445: teximacroclose(p);
1.1 kristaps 1446: }
1447:
1448: static void
1449: dotop(struct texi *p, enum texicmd cmd,
1450: const char *buf, size_t sz, size_t *pos)
1451: {
1452:
1.3 kristaps 1453: p->ign--;
1454: advanceeoln(p, buf, sz, pos, 1);
1.6 kristaps 1455: teximacro(p, "Dd $Mdocdate: February 18 2015 $");
1.5 kristaps 1456: teximacro(p, "Dt SOMETHING 7");
1457: teximacro(p, "Os");
1458: teximacro(p, "Sh NAME");
1459: teximacro(p, "Nm Something");
1460: teximacro(p, "Nd Something");
1.1 kristaps 1461: }
1462:
1463: static void
1464: doitem(struct texi *p, enum texicmd cmd,
1465: const char *buf, size_t sz, size_t *pos)
1466: {
1467:
1.3 kristaps 1468: if (p->outmacro)
1469: texierr(p, "item in open line scope!?");
1470: else if (p->literal)
1471: texierr(p, "item in a literal scope!?");
1472:
1473: switch (p->list) {
1474: case (TEXILIST_ITEM):
1.5 kristaps 1475: teximacroopen(p, "It");
1.3 kristaps 1476: break;
1477: case (TEXILIST_NOITEM):
1.5 kristaps 1478: teximacro(p, "It");
1.3 kristaps 1479: break;
1480: default:
1.5 kristaps 1481: teximacro(p, "Pp");
1.3 kristaps 1482: break;
1483: }
1484:
1485: parseeoln(p, buf, sz, pos);
1.1 kristaps 1486:
1.3 kristaps 1487: if (TEXILIST_ITEM == p->list)
1488: teximacroclose(p);
1.9 ! kristaps 1489: else if (p->outcol > 0)
1.1 kristaps 1490: texiputchar(p, '\n');
1491: }
1492:
1493: static void
1494: dotable(struct texi *p, enum texicmd cmd,
1495: const char *buf, size_t sz, size_t *pos)
1496: {
1.3 kristaps 1497: enum texilist sv = p->list;
1498:
1499: p->list = TEXILIST_ITEM;
1.5 kristaps 1500: teximacro(p, "Bl -tag -width Ds");
1.1 kristaps 1501: parseto(p, buf, sz, pos, "table");
1.5 kristaps 1502: teximacro(p, "El");
1.3 kristaps 1503: p->list = sv;
1.1 kristaps 1504: }
1505:
1506: static void
1.2 kristaps 1507: doenumerate(struct texi *p, enum texicmd cmd,
1508: const char *buf, size_t sz, size_t *pos)
1509: {
1.3 kristaps 1510: enum texilist sv = p->list;
1.2 kristaps 1511:
1.3 kristaps 1512: p->list = TEXILIST_NOITEM;
1.5 kristaps 1513: teximacro(p, "Bl -enum");
1.2 kristaps 1514: parseto(p, buf, sz, pos, "enumerate");
1.5 kristaps 1515: teximacro(p, "El");
1.3 kristaps 1516: p->list = sv;
1.2 kristaps 1517: }
1518:
1519: static void
1.1 kristaps 1520: doitemize(struct texi *p, enum texicmd cmd,
1521: const char *buf, size_t sz, size_t *pos)
1522: {
1.3 kristaps 1523: enum texilist sv = p->list;
1.1 kristaps 1524:
1.3 kristaps 1525: p->list = TEXILIST_ITEM;
1.5 kristaps 1526: teximacro(p, "Bl -bullet");
1.1 kristaps 1527: parseto(p, buf, sz, pos, "itemize");
1.5 kristaps 1528: teximacro(p, "El");
1.3 kristaps 1529: p->list = sv;
1.1 kristaps 1530: }
1531:
1532: static void
1533: doignbracket(struct texi *p, enum texicmd cmd,
1534: const char *buf, size_t sz, size_t *pos)
1535: {
1536:
1.3 kristaps 1537: p->ign++;
1.1 kristaps 1538: parsebracket(p, buf, sz, pos);
1.3 kristaps 1539: p->ign--;
1.1 kristaps 1540: }
1541:
1542: static void
1543: doignline(struct texi *p, enum texicmd cmd,
1544: const char *buf, size_t sz, size_t *pos)
1545: {
1546:
1.3 kristaps 1547: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1548: }
1549:
1.8 kristaps 1550: /*
1551: * Parse colon-separated directories from "cp" (if not NULL) and returns
1552: * the array of pointers.
1553: * Prepends "base" to the array.
1554: * This does NOT sanitise the directories!
1555: */
1.5 kristaps 1556: static char **
1557: parsedirs(const char *base, const char *cp, size_t *sz)
1558: {
1559: char *tok, *str, *tofree;
1560: const char *cpp;
1561: size_t i;
1562: char **dirs;
1563:
1564: *sz = NULL != (cpp = cp) ? 2 : 1;
1565: if (*sz > 1)
1566: for ( ; NULL != (cpp = strchr(cpp, ':')); (*sz)++)
1567: cpp++;
1568:
1569: dirs = calloc(*sz, sizeof(char *));
1570: if (NULL == dirs) {
1571: perror(NULL);
1572: exit(EXIT_FAILURE);
1573: } else if (NULL == (dirs[0] = strdup(base))) {
1574: perror(NULL);
1575: exit(EXIT_FAILURE);
1576: }
1577:
1578: if (NULL == cp)
1579: return(dirs);
1580:
1581: if (NULL == (tofree = tok = str = strdup(cp))) {
1582: perror(NULL);
1583: exit(EXIT_FAILURE);
1584: }
1585:
1586: for (i = 1; NULL != (tok = strsep(&str, ":")); i++)
1587: if (NULL == (dirs[i] = strdup(tok))) {
1588: perror(NULL);
1589: exit(EXIT_FAILURE);
1590: }
1591:
1592: free(tofree);
1593: return(dirs);
1594: }
1595:
1.1 kristaps 1596: int
1597: main(int argc, char *argv[])
1598: {
1599: struct texi texi;
1.2 kristaps 1600: int c;
1601: char *path, *dir;
1.5 kristaps 1602: const char *progname, *Idir;
1.1 kristaps 1603:
1604: progname = strrchr(argv[0], '/');
1605: if (progname == NULL)
1606: progname = argv[0];
1607: else
1608: ++progname;
1609:
1.5 kristaps 1610: Idir = NULL;
1611: while (-1 != (c = getopt(argc, argv, "I:")))
1.1 kristaps 1612: switch (c) {
1.5 kristaps 1613: case ('I'):
1614: Idir = optarg;
1615: break;
1.1 kristaps 1616: default:
1617: goto usage;
1618: }
1619:
1620: argv += optind;
1621: if (0 == (argc -= optind))
1622: goto usage;
1623:
1.2 kristaps 1624: if (NULL == (path = strdup(argv[0]))) {
1625: perror(NULL);
1626: exit(EXIT_FAILURE);
1627: } else if (NULL == (dir = dirname(path))) {
1628: perror(argv[0]);
1629: free(path);
1630: exit(EXIT_FAILURE);
1631: }
1632: free(path);
1633:
1.1 kristaps 1634: memset(&texi, 0, sizeof(struct texi));
1.3 kristaps 1635: texi.ign = 1;
1.5 kristaps 1636: texi.dirs = parsedirs(dir, Idir, &texi.dirsz);
1.2 kristaps 1637: parsefile(&texi, argv[0]);
1.5 kristaps 1638: /* We shouldn't get here. */
1.2 kristaps 1639: texiexit(&texi);
1640: return(EXIT_FAILURE);
1.1 kristaps 1641: usage:
1.8 kristaps 1642: fprintf(stderr, "usage: %s [-Idirs] file\n", progname);
1.1 kristaps 1643: return(EXIT_FAILURE);
1644: }
CVSweb