Annotation of texi2mdoc/main.c, Revision 1.10
1.10 ! kristaps 1: /* $Id: main.c,v 1.9 2015/02/18 14:52:45 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 kristaps 24: #include <libgen.h>
25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
1.10 ! kristaps 30: #include <time.h>
1.6 kristaps 31: #include <unistd.h>
1.1 kristaps 32:
33: /*
34: * This defines each one of the Texinfo commands that we understand.
35: * Obviously this only refers to native commands; overriden names are a
36: * different story.
37: */
38: enum texicmd {
1.2 kristaps 39: TEXICMD_ACRONYM,
1.1 kristaps 40: TEXICMD_A4PAPER,
41: TEXICMD_ANCHOR,
1.2 kristaps 42: TEXICMD_APPENDIX,
43: TEXICMD_APPENDIXSEC,
1.3 kristaps 44: TEXICMD_ASTERISK,
1.1 kristaps 45: TEXICMD_AT,
1.3 kristaps 46: TEXICMD_AUTHOR,
47: TEXICMD_BANG,
1.7 kristaps 48: TEXICMD_BULLET,
1.1 kristaps 49: TEXICMD_BYE,
1.5 kristaps 50: TEXICMD_CENTER,
1.1 kristaps 51: TEXICMD_CHAPTER,
52: TEXICMD_CINDEX,
1.3 kristaps 53: TEXICMD_CITE,
1.1 kristaps 54: TEXICMD_CODE,
1.3 kristaps 55: TEXICMD_COLON,
1.1 kristaps 56: TEXICMD_COMMAND,
57: TEXICMD_COMMENT,
1.2 kristaps 58: TEXICMD_COMMENT_LONG,
1.1 kristaps 59: TEXICMD_CONTENTS,
60: TEXICMD_COPYING,
61: TEXICMD_COPYRIGHT,
1.3 kristaps 62: TEXICMD_DEFTYPEFN,
63: TEXICMD_DEFTYPEFNX,
64: TEXICMD_DEFTYPEFUN,
65: TEXICMD_DEFTYPEFUNX,
66: TEXICMD_DEFTYPEVAR,
67: TEXICMD_DEFTYPEVR,
1.1 kristaps 68: TEXICMD_DETAILMENU,
1.3 kristaps 69: TEXICMD_DFN,
1.1 kristaps 70: TEXICMD_DIRCATEGORY,
71: TEXICMD_DIRENTRY,
1.3 kristaps 72: TEXICMD_DISPLAY,
1.2 kristaps 73: TEXICMD_DOTS,
1.1 kristaps 74: TEXICMD_EMAIL,
75: TEXICMD_EMPH,
76: TEXICMD_END,
1.2 kristaps 77: TEXICMD_ENUMERATE,
1.3 kristaps 78: TEXICMD_ENV,
1.1 kristaps 79: TEXICMD_EXAMPLE,
80: TEXICMD_FILE,
1.3 kristaps 81: TEXICMD_GROUP,
1.2 kristaps 82: TEXICMD_HEADING,
1.3 kristaps 83: TEXICMD_HEADINGS,
84: TEXICMD_HYPHEN,
1.1 kristaps 85: TEXICMD_I,
1.3 kristaps 86: TEXICMD_IFCLEAR,
1.1 kristaps 87: TEXICMD_IFHTML,
1.3 kristaps 88: TEXICMD_IFINFO,
1.1 kristaps 89: TEXICMD_IFNOTTEX,
90: TEXICMD_IFTEX,
1.3 kristaps 91: TEXICMD_IFSET,
1.1 kristaps 92: TEXICMD_IMAGE,
1.2 kristaps 93: TEXICMD_INCLUDE,
1.5 kristaps 94: TEXICMD_INSERTCOPYING,
1.1 kristaps 95: TEXICMD_ITEM,
96: TEXICMD_ITEMIZE,
97: TEXICMD_KBD,
98: TEXICMD_LATEX,
1.3 kristaps 99: TEXICMD_MATH,
1.1 kristaps 100: TEXICMD_MENU,
1.3 kristaps 101: TEXICMD_NEWLINE,
1.1 kristaps 102: TEXICMD_NODE,
1.3 kristaps 103: TEXICMD_NOINDENT,
1.8 kristaps 104: TEXICMD_PXREF,
1.3 kristaps 105: TEXICMD_QUESTIONMARK,
1.1 kristaps 106: TEXICMD_QUOTATION,
1.3 kristaps 107: TEXICMD_PAGE,
1.1 kristaps 108: TEXICMD_PARINDENT,
1.2 kristaps 109: TEXICMD_PRINTINDEX,
1.1 kristaps 110: TEXICMD_REF,
111: TEXICMD_SAMP,
1.7 kristaps 112: TEXICMD_SC,
1.1 kristaps 113: TEXICMD_SECTION,
1.3 kristaps 114: TEXICMD_SET,
1.1 kristaps 115: TEXICMD_SETCHAPNEWPAGE,
116: TEXICMD_SETFILENAME,
117: TEXICMD_SETTITLE,
1.3 kristaps 118: TEXICMD_SP,
119: TEXICMD_SPACE,
120: TEXICMD_SMALLEXAMPLE,
121: TEXICMD_SQUIGGLE_LEFT,
122: TEXICMD_SQUIGGLE_RIGHT,
1.8 kristaps 123: TEXICMD_STRONG,
1.1 kristaps 124: TEXICMD_SUBSECTION,
1.3 kristaps 125: TEXICMD_SUBTITLE,
126: TEXICMD_TAB,
1.1 kristaps 127: TEXICMD_TABLE,
128: TEXICMD_TEX,
129: TEXICMD_TEXSYM,
1.3 kristaps 130: TEXICMD_TITLE,
1.1 kristaps 131: TEXICMD_TITLEFONT,
132: TEXICMD_TITLEPAGE,
133: TEXICMD_TOP,
134: TEXICMD_UNNUMBERED,
1.2 kristaps 135: TEXICMD_UNNUMBEREDSEC,
1.3 kristaps 136: TEXICMD_UREF,
1.1 kristaps 137: TEXICMD_URL,
138: TEXICMD_VAR,
1.9 kristaps 139: TEXICMD_VSKIP,
1.3 kristaps 140: TEXICMD_W,
1.8 kristaps 141: TEXICMD_XREF,
1.1 kristaps 142: TEXICMD__MAX
143: };
144:
145: /*
146: * The file currently being parsed.
147: * This keeps track of our location within that file.
148: */
149: struct texifile {
150: const char *name; /* name of the file */
151: size_t line; /* current line (from zero) */
152: size_t col; /* current column in line (from zero) */
153: char *map; /* mmap'd file */
154: size_t mapsz; /* size of mmap */
155: };
156:
157: struct texi;
158:
1.2 kristaps 159: /*
160: * Callback for functions implementing texi commands.
161: */
1.1 kristaps 162: typedef void (*texicmdfp)(struct texi *,
163: enum texicmd, const char *, size_t, size_t *);
164:
165: /*
166: * Describes Texinfo commands, whether native or overriden.
167: */
168: struct texitok {
169: texicmdfp fp; /* callback (or NULL if none) */
170: const char *tok; /* name of the token */
171: size_t len; /* strlen(tok) */
172: };
173:
1.3 kristaps 174: enum texilist {
175: TEXILIST_NONE = 0,
176: TEXILIST_ITEM,
177: TEXILIST_NOITEM,
178: };
179:
1.1 kristaps 180: /*
181: * The main parse structure.
182: * This keeps any necessary information handy.
183: */
184: struct texi {
1.5 kristaps 185: struct texifile files[64]; /* stack of open files */
186: size_t filepos; /* number of open files */
187: size_t outcol; /* column in output line */
188: char **dirs; /* texi directories */
189: size_t dirsz; /* number of texi directories */
1.8 kristaps 190: enum texilist list; /* current list (set recursively) */
191: int outmacro; /* if >0, output is in line macro */
192: int seenws; /* ws has been seen (and ignored) */
193: int ign; /* if >0, don't print anything */
194: int literal; /* if >0, literal context */
1.10 ! kristaps 195: char *title; /* title of document */
! 196: char *subtitle; /* subtitle of document */
1.1 kristaps 197: };
198:
1.8 kristaps 199: /* FIXME: FIND A BETTER WAY. */
1.1 kristaps 200: #define ismpunct(_x) \
201: ('.' == (_x) || \
202: ',' == (_x) || \
203: ';' == (_x))
1.8 kristaps 204: /* Texi disregards spaces and tabs. */
1.2 kristaps 205: #define isws(_x) \
206: (' ' == (_x) || '\t' == (_x))
1.9 kristaps 207: #define ismspace(_x) \
1.10 ! kristaps 208: (isws((_x)) || '\n' == (_x))
1.1 kristaps 209:
1.3 kristaps 210: static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 211: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
212: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 213: static void dochapter(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 214: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 215: static void dodeftypefun(struct texi *, enum texicmd, const char *, size_t, size_t *);
216: static void dodeftypevar(struct texi *, enum texicmd, const char *, size_t, size_t *);
217: static void dodisplay(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 218: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 219: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 220: static void doenv(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 221: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
222: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 223: static void doignargn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 224: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
225: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
226: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 227: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 228: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
229: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
230: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 231: static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 232: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 233: static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 234: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
235: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
236: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
237: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 238: static void dosp(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 239: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
240: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.10 ! kristaps 241: static void dotitle(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 242:
243: static const struct texitok texitoks[TEXICMD__MAX] = {
1.8 kristaps 244: { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1 kristaps 245: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
246: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.3 kristaps 247: { dochapter, "appendix", 8 }, /* TEXICMD_APPENDIX */
248: { dochapter, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
249: { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */
1.1 kristaps 250: { dosymbol, "@", 1 }, /* TEXICMD_AT */
1.3 kristaps 251: { doignline, "author", 6 }, /* TEXICMD_AUTHOR */
252: { dosymbol, "!", 1 }, /* TEXICMD_BANG */
1.7 kristaps 253: { dosymbol, "bullet", 6 }, /* TEXICMD_BULLET */
1.1 kristaps 254: { dobye, "bye", 3 }, /* TEXICMD_BYE */
1.5 kristaps 255: { doignline, "center", 5 }, /* TEXICMD_CENTER */
1.3 kristaps 256: { dochapter, "chapter", 7 }, /* TEXICMD_CHAPTER */
1.1 kristaps 257: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
258: { doliteral, "code", 4 }, /* TEXICMD_CODE */
1.3 kristaps 259: { doitalic, "cite", 4 }, /* TEXICMD_CITE */
260: { dosymbol, ":", 1 }, /* TEXICMD_COLON */
1.1 kristaps 261: { docommand, "command", 7 }, /* TEXICMD_COMMAND */
262: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 kristaps 263: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 264: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
265: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
266: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
1.3 kristaps 267: { dodeftypefun, "deftypefn", 9 }, /* TEXICMD_DEFTYPEFN */
268: { dodeftypefun, "deftypefnx", 10 }, /* TEXICMD_DEFTYPEFNX */
269: { dodeftypefun, "deftypefun", 10 }, /* TEXICMD_DEFTYPEFUN */
270: { dodeftypefun, "deftypefunx", 11 }, /* TEXICMD_DEFTYPEFUNX */
271: { dodeftypevar, "deftypevar", 10 }, /* TEXICMD_DEFTYPEVAR */
272: { dodeftypevar, "deftypevr", 9 }, /* TEXICMD_DEFTYPEVR */
1.1 kristaps 273: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
1.3 kristaps 274: { doitalic, "dfn", 3 }, /* TEXICMD_DFN */
1.1 kristaps 275: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
276: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.3 kristaps 277: { dodisplay, "display", 7 }, /* TEXICMD_DISPLAY */
1.2 kristaps 278: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.8 kristaps 279: { dolink, "email", 5 }, /* TEXICMD_EMAIL */
1.1 kristaps 280: { doemph, "emph", 4 }, /* TEXICMD_EMPH */
281: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 kristaps 282: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.3 kristaps 283: { doenv, "env", 3 }, /* TEXICMD_ENV */
1.1 kristaps 284: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
285: { dofile, "file", 4 }, /* TEXICMD_FILE */
1.3 kristaps 286: { doblock, "group", 5 }, /* TEXICMD_GROUP */
1.2 kristaps 287: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.3 kristaps 288: { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */
289: { dosymbol, "-", 1 }, /* TEXICMD_HYPHEN */
1.1 kristaps 290: { doitalic, "i", 1 }, /* TEXICMD_I */
1.3 kristaps 291: { doignblock, "ifclear", 7 }, /* TEXICMD_IFCLEAR */
1.1 kristaps 292: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
1.3 kristaps 293: { doignblock, "ifinfo", 6 }, /* TEXICMD_IFINFO */
294: { doblock, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
1.1 kristaps 295: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
1.3 kristaps 296: { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
1.1 kristaps 297: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 kristaps 298: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.5 kristaps 299: { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
1.1 kristaps 300: { doitem, "item", 4 }, /* TEXICMD_ITEM */
301: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
302: { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
303: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
1.3 kristaps 304: { domath, "math", 4 }, /* TEXICMD_MATH */
1.1 kristaps 305: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
1.3 kristaps 306: { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */
1.1 kristaps 307: { doignline, "node", 4 }, /* TEXICMD_NODE */
1.3 kristaps 308: { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */
1.8 kristaps 309: { dolink, "pxref", 5 }, /* TEXICMD_PXREF */
1.3 kristaps 310: { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */
1.1 kristaps 311: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.3 kristaps 312: { doignline, "page", 4 }, /* TEXICMD_PAGE */
313: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
1.2 kristaps 314: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1 kristaps 315: { dobracket, "ref", 3 }, /* TEXICMD_REF */
316: { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
1.7 kristaps 317: { dobracket, "sc", 2 }, /* TEXICMD_SC */
1.1 kristaps 318: { dosection, "section", 7 }, /* TEXICMD_SECTION */
1.3 kristaps 319: { doignline, "set", 3 }, /* TEXICMD_SET */
1.1 kristaps 320: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
321: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
1.10 ! kristaps 322: { dotitle, "settitle", 8 }, /* TEXICMD_SETTITLE */
1.3 kristaps 323: { dosp, "sp", 2 }, /* TEXICMD_SP */
324: { dosymbol, " ", 1 }, /* TEXICMD_SPACE */
325: { doexample, "smallexample", 12 }, /* TEXICMD_SMALLEXAMPLE */
326: { dosymbol, "{", 1 }, /* TEXICMD_SQUIGGLE_LEFT */
327: { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */
1.8 kristaps 328: { doemph, "strong", 6 }, /* TEXICMD_STRONG */
1.1 kristaps 329: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
1.3 kristaps 330: { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */
331: { dosymbol, "\t", 1 }, /* TEXICMD_TAB */
1.1 kristaps 332: { dotable, "table", 5 }, /* TEXICMD_TABLE */
333: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
334: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
1.3 kristaps 335: { doignline, "title", 5 }, /* TEXICMD_TITLE */
1.1 kristaps 336: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
337: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
338: { dotop, "top", 3 }, /* TEXICMD_TOP */
1.3 kristaps 339: { dochapter, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 kristaps 340: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.8 kristaps 341: { dolink, "uref", 4 }, /* TEXICMD_UREF */
342: { dolink, "url", 3 }, /* TEXICMD_URL */
1.1 kristaps 343: { doliteral, "var", 3 }, /* TEXICMD_VAR */
1.9 kristaps 344: { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */
1.3 kristaps 345: { dobracket, "w", 1 }, /* TEXICMD_W */
1.8 kristaps 346: { dolink, "xref", 4 }, /* TEXICMD_XREF */
1.1 kristaps 347: };
348:
1.2 kristaps 349: /*
350: * Unmap the top-most file that we're using.
351: */
1.1 kristaps 352: static void
353: texifilepop(struct texi *p)
354: {
355: struct texifile *f;
356:
357: assert(p->filepos > 0);
358: f = &p->files[--p->filepos];
359: munmap(f->map, f->mapsz);
360: }
361:
1.2 kristaps 362: /*
1.8 kristaps 363: * Unmap all files that we're currently using and free all resources
364: * that we've allocated during the parse.
1.2 kristaps 365: * The utility should exit(...) after this is called.
366: */
1.1 kristaps 367: static void
368: texiexit(struct texi *p)
369: {
1.5 kristaps 370: size_t i;
371:
372: if (p->outcol)
373: putchar('\n');
1.1 kristaps 374:
375: while (p->filepos > 0)
376: texifilepop(p);
1.5 kristaps 377:
378: for (i = 0; i < p->dirsz; i++)
379: free(p->dirs[i]);
1.10 ! kristaps 380:
1.5 kristaps 381: free(p->dirs);
1.10 ! kristaps 382: free(p->subtitle);
! 383: free(p->title);
1.1 kristaps 384: }
385:
1.2 kristaps 386: /*
387: * Fatal error: unmap all files and exit.
388: * The "errstring" is passed to perror(3).
389: */
1.1 kristaps 390: static void
1.2 kristaps 391: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 392: {
393:
394: perror(errstring);
395: texiexit(p);
396: exit(EXIT_FAILURE);
397: }
398:
399: /*
400: * Print a generic warning message (to stderr) tied to our current
401: * location in the parse sequence.
402: */
403: static void
404: texiwarn(const struct texi *p, const char *fmt, ...)
405: {
406: va_list ap;
407:
1.2 kristaps 408: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 409: p->files[p->filepos - 1].name,
410: p->files[p->filepos - 1].line + 1,
411: p->files[p->filepos - 1].col + 1);
412: va_start(ap, fmt);
413: vfprintf(stderr, fmt, ap);
414: va_end(ap);
415: fputc('\n', stderr);
416: }
417:
1.8 kristaps 418: /*
419: * Print an error message (to stderr) tied to our current location in
420: * the parse sequence, invoke texiexit(), then die.
421: */
1.2 kristaps 422: static void
423: texierr(struct texi *p, const char *fmt, ...)
424: {
425: va_list ap;
426:
427: fprintf(stderr, "%s:%zu:%zu: error: ",
428: p->files[p->filepos - 1].name,
429: p->files[p->filepos - 1].line + 1,
430: p->files[p->filepos - 1].col + 1);
431: va_start(ap, fmt);
432: vfprintf(stderr, fmt, ap);
433: va_end(ap);
434: fputc('\n', stderr);
435: texiexit(p);
436: exit(EXIT_FAILURE);
437: }
438:
1.1 kristaps 439: /*
1.8 kristaps 440: * Put a single data character to the output if we're not ignoring.
441: * Adjusts our output status.
1.1 kristaps 442: */
443: static void
444: texiputchar(struct texi *p, char c)
445: {
446:
1.3 kristaps 447: if (p->ign)
1.1 kristaps 448: return;
449: putchar(c);
450: if ('\n' == c) {
451: p->outcol = 0;
452: p->seenws = 0;
453: } else
454: p->outcol++;
455: }
456:
457: /*
458: * Put multiple characters (see texiputchar()).
459: */
460: static void
461: texiputchars(struct texi *p, const char *s)
462: {
463:
464: while ('\0' != *s)
465: texiputchar(p, *s++);
466: }
467:
468: /*
1.8 kristaps 469: * Close an mdoc(7) macro opened with teximacroopen().
470: * If there are no more macros on the line, prints a newline.
1.1 kristaps 471: */
472: static void
1.3 kristaps 473: teximacroclose(struct texi *p)
474: {
475:
1.8 kristaps 476: /* FIXME: punctuation. */
1.5 kristaps 477: if (0 == --p->outmacro)
478: texiputchar(p, '\n');
1.3 kristaps 479: }
480:
481: /*
1.8 kristaps 482: * Open a mdoc(7) macro.
483: * This is used for line macros, e.g., Qq [foo bar baz].
484: * It can be invoked for nested macros, e.g., Qq Li foo .
1.3 kristaps 485: */
486: static void
487: teximacroopen(struct texi *p, const char *s)
1.1 kristaps 488: {
489:
1.5 kristaps 490: if (p->outcol && 0 == p->outmacro)
491: texiputchar(p, '\n');
492: if (0 == p->outmacro)
493: texiputchar(p, '.');
494: else
495: texiputchar(p, ' ');
496: texiputchars(p, s);
497: texiputchar(p, ' ');
1.3 kristaps 498: p->outmacro++;
1.5 kristaps 499: p->seenws = 0;
1.1 kristaps 500: }
501:
502: /*
1.8 kristaps 503: * Put a stadnalone mdoc(7) command with the trailing newline.
1.1 kristaps 504: */
505: static void
506: teximacro(struct texi *p, const char *s)
507: {
508:
1.4 kristaps 509: if (p->outmacro)
510: texierr(p, "\"%s\" in open line scope!?", s);
511: else if (p->literal)
512: texierr(p, "\"%s\" in a literal scope!?", s);
513:
1.1 kristaps 514: if (p->outcol)
515: texiputchar(p, '\n');
1.5 kristaps 516:
517: texiputchar(p, '.');
518: texiputchars(p, s);
519: texiputchar(p, '\n');
1.1 kristaps 520: }
521:
522: /*
523: * Advance by a single byte in the input stream.
524: */
525: static void
526: advance(struct texi *p, const char *buf, size_t *pos)
527: {
528:
529: if ('\n' == buf[*pos]) {
530: p->files[p->filepos - 1].line++;
531: p->files[p->filepos - 1].col = 0;
532: } else
533: p->files[p->filepos - 1].col++;
534:
535: (*pos)++;
536: }
537:
538: /*
539: * Advance to the next non-whitespace word in the input stream.
540: * If we're in literal mode, then print all of the whitespace as we're
541: * doing so.
542: */
543: static size_t
544: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
545: {
546:
1.3 kristaps 547: if (p->literal) {
1.9 kristaps 548: while (*pos < sz && ismspace(buf[*pos])) {
1.5 kristaps 549: if (*pos && '\n' == buf[*pos] &&
550: '\\' == buf[*pos - 1])
551: texiputchar(p, 'e');
1.1 kristaps 552: texiputchar(p, buf[*pos]);
553: advance(p, buf, pos);
554: }
555: return(*pos);
556: }
557:
1.9 kristaps 558: while (*pos < sz && ismspace(buf[*pos])) {
1.1 kristaps 559: p->seenws = 1;
560: /*
561: * If it looks like we've printed a double-line, then
562: * output a paragraph.
563: * FIXME: this is stupid.
564: */
1.5 kristaps 565: if (*pos && '\n' == buf[*pos] &&
566: '\n' == buf[*pos - 1])
567: teximacro(p, "Pp");
1.1 kristaps 568: advance(p, buf, pos);
569: }
570: return(*pos);
571: }
572:
573: /*
574: * Advance to the EOLN in the input stream.
575: */
576: static size_t
1.3 kristaps 577: advanceeoln(struct texi *p, const char *buf,
578: size_t sz, size_t *pos, int consumenl)
1.1 kristaps 579: {
580:
1.8 kristaps 581: /* FIXME: disregards @NEWLINE. */
1.1 kristaps 582: while (*pos < sz && '\n' != buf[*pos])
583: advance(p, buf, pos);
1.3 kristaps 584: if (*pos < sz && consumenl)
585: advance(p, buf, pos);
1.1 kristaps 586: return(*pos);
587: }
588:
589: /*
590: * Advance to position "end", which is an absolute position in the
591: * current buffer greater than or equal to the current position.
592: */
593: static void
594: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
595: {
596:
597: assert(*pos <= end);
598: while (*pos < end)
599: advance(p, buf, pos);
600: }
601:
602: /*
603: * Output a free-form word in the input stream, progressing to the next
604: * command or white-space.
605: * This also will advance the input stream.
606: */
607: static void
1.8 kristaps 608: texiword(struct texi *p, const char *buf,
609: size_t sz, size_t *pos, char extra)
1.1 kristaps 610: {
611:
1.3 kristaps 612: if (0 == p->outmacro && p->outcol > 72 && 0 == p->literal)
1.1 kristaps 613: texiputchar(p, '\n');
1.8 kristaps 614: /* FIXME: abstract this: we use it elsewhere. */
1.3 kristaps 615: if (p->seenws && p->outcol && 0 == p->literal)
1.1 kristaps 616: texiputchar(p, ' ');
617:
618: p->seenws = 0;
619:
1.9 kristaps 620: while (*pos < sz && ! ismspace(buf[*pos])) {
1.1 kristaps 621: switch (buf[*pos]) {
622: case ('@'):
623: case ('}'):
624: case ('{'):
625: return;
626: }
1.8 kristaps 627: if ('\0' != extra && buf[*pos] == extra)
628: return;
1.1 kristaps 629: if (*pos < sz - 1 &&
630: '`' == buf[*pos] &&
631: '`' == buf[*pos + 1]) {
632: texiputchars(p, "\\(lq");
633: advance(p, buf, pos);
634: } else if (*pos < sz - 1 &&
635: '\'' == buf[*pos] &&
636: '\'' == buf[*pos + 1]) {
637: texiputchars(p, "\\(rq");
638: advance(p, buf, pos);
639: } else
640: texiputchar(p, buf[*pos]);
641: advance(p, buf, pos);
642: }
643: }
644:
1.8 kristaps 645: /*
646: * Look up the command at position "pos" in the buffer, returning it (or
647: * TEXICMD__MAX if none found) and setting "end" to be the absolute
648: * index after the command name.
649: */
1.1 kristaps 650: static enum texicmd
651: texicmd(struct texi *p, const char *buf,
652: size_t pos, size_t sz, size_t *end)
653: {
654: size_t i, len;
655:
656: assert('@' == buf[pos]);
1.3 kristaps 657:
1.9 kristaps 658: if ((*end = pos) == sz)
659: return(TEXICMD__MAX);
660: else if ((*end = ++pos) == sz)
1.3 kristaps 661: return(TEXICMD__MAX);
662:
663: /* Alphabetic commands are special. */
664: if ( ! isalpha(buf[pos])) {
1.9 kristaps 665: if ((*end = pos + 1) == sz)
666: return(TEXICMD__MAX);
1.3 kristaps 667: for (i = 0; i < TEXICMD__MAX; i++) {
668: if (1 != texitoks[i].len)
669: continue;
670: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
671: return(i);
672: }
673: texiwarn(p, "bad command: @%c", buf[pos]);
674: return(TEXICMD__MAX);
675: }
676:
1.9 kristaps 677: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
1.3 kristaps 678: if ((*end > pos && ('@' == buf[*end] ||
679: '{' == buf[*end] || '}' == buf[*end])))
1.1 kristaps 680: break;
681:
682: len = *end - pos;
683: for (i = 0; i < TEXICMD__MAX; i++) {
684: if (len != texitoks[i].len)
685: continue;
686: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
687: return(i);
688: }
689:
1.3 kristaps 690: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
1.1 kristaps 691: return(TEXICMD__MAX);
692: }
693:
1.8 kristaps 694: /*
695: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
696: * Num should be set to the argument we're currently parsing, although
697: * it suffixes for it to be zero or non-zero.
698: * This will return 1 if there are more arguments, 0 otherwise.
699: * This will stop (returning 0) in the event of EOF or if we're not at a
700: * bracket for the zeroth parse.
701: */
702: static int
703: parsearg(struct texi *p, const char *buf,
704: size_t sz, size_t *pos, size_t num)
705: {
706: size_t end;
707: enum texicmd cmd;
708:
1.9 kristaps 709: while (*pos < sz && ismspace(buf[*pos]))
1.8 kristaps 710: advance(p, buf, pos);
711: if (*pos == sz || (0 == num && '{' != buf[*pos]))
712: return(0);
713: if (0 == num)
714: advance(p, buf, pos);
715:
716: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
717: switch (buf[*pos]) {
718: case (','):
719: advance(p, buf, pos);
720: return(1);
721: case ('}'):
722: advance(p, buf, pos);
723: return(0);
724: case ('{'):
725: if (0 == p->ign)
726: texiwarn(p, "unexpected \"{\"");
727: advance(p, buf, pos);
728: continue;
729: case ('@'):
730: break;
731: default:
732: texiword(p, buf, sz, pos, ',');
733: continue;
734: }
735:
736: cmd = texicmd(p, buf, *pos, sz, &end);
737: advanceto(p, buf, pos, end);
738: if (TEXICMD__MAX == cmd)
739: continue;
740: if (NULL != texitoks[cmd].fp)
741: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
742: }
743: return(0);
744: }
745:
746: /*
747: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
748: * This will stop in the event of EOF or if we're not at a bracket.
749: */
1.1 kristaps 750: static void
751: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
752: {
753: size_t end;
754: enum texicmd cmd;
755:
1.9 kristaps 756: while (*pos < sz && ismspace(buf[*pos]))
1.3 kristaps 757: advance(p, buf, pos);
758:
1.1 kristaps 759: if (*pos == sz || '{' != buf[*pos])
760: return;
761: advance(p, buf, pos);
762:
763: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
764: switch (buf[*pos]) {
765: case ('}'):
766: advance(p, buf, pos);
767: return;
768: case ('{'):
1.3 kristaps 769: if (0 == p->ign)
770: texiwarn(p, "unexpected \"{\"");
771: advance(p, buf, pos);
772: continue;
773: case ('@'):
774: break;
775: default:
1.8 kristaps 776: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 777: continue;
778: }
779:
780: cmd = texicmd(p, buf, *pos, sz, &end);
781: advanceto(p, buf, pos, end);
782: if (TEXICMD__MAX == cmd)
783: continue;
784: if (NULL != texitoks[cmd].fp)
785: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
786: }
787: }
788:
789: /*
790: * This should be invoked when we're on a macro line and want to process
791: * to the end of the current input line, doing all of our macros along
792: * the way.
793: */
794: static void
795: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
796: {
797: size_t end;
798: enum texicmd cmd;
799:
800: assert(0 == p->literal);
801:
802: while (*pos < sz && '\n' != buf[*pos]) {
803: while (*pos < sz && isws(buf[*pos])) {
804: p->seenws = 1;
805: advance(p, buf, pos);
806: }
807: switch (buf[*pos]) {
808: case ('}'):
809: if (0 == p->ign)
810: texiwarn(p, "unexpected \"}\"");
811: advance(p, buf, pos);
812: continue;
813: case ('{'):
814: if (0 == p->ign)
815: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 816: advance(p, buf, pos);
817: continue;
818: case ('@'):
819: break;
820: default:
1.8 kristaps 821: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 822: continue;
823: }
824:
825: cmd = texicmd(p, buf, *pos, sz, &end);
826: advanceto(p, buf, pos, end);
827: if (TEXICMD__MAX == cmd)
828: continue;
829: if (NULL != texitoks[cmd].fp)
830: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
831: }
832: }
833:
1.8 kristaps 834: /*
835: * Parse a single word or command.
836: * This will return immediately at the EOF.
837: */
1.1 kristaps 838: static void
1.3 kristaps 839: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
840: {
841: size_t end;
842: enum texicmd cmd;
843:
844: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
845: return;
846:
847: switch (buf[*pos]) {
848: case ('}'):
849: if (0 == p->ign)
850: texiwarn(p, "unexpected \"}\"");
851: advance(p, buf, pos);
852: return;
853: case ('{'):
854: if (0 == p->ign)
855: texiwarn(p, "unexpected \"{\"");
856: advance(p, buf, pos);
857: return;
858: case ('@'):
859: break;
860: default:
1.8 kristaps 861: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 862: return;
863: }
864:
865: cmd = texicmd(p, buf, *pos, sz, &end);
866: advanceto(p, buf, pos, end);
867: if (TEXICMD__MAX == cmd)
868: return;
869: if (NULL != texitoks[cmd].fp)
870: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
871: }
872:
1.8 kristaps 873: /*
874: * Parse til the end of the buffer.
875: */
1.3 kristaps 876: static void
1.7 kristaps 877: parseeof(struct texi *p, const char *buf, size_t sz)
878: {
879: size_t pos;
880:
881: for (pos = 0; pos < sz; )
882: parsesingle(p, buf, sz, &pos);
883: }
884:
1.8 kristaps 885: /*
886: * Parse a block sequence until we have the "@end endtoken" command
887: * invocation.
888: * This will return immediately at EOF.
889: */
1.7 kristaps 890: static void
1.1 kristaps 891: parseto(struct texi *p, const char *buf,
892: size_t sz, size_t *pos, const char *endtoken)
893: {
894: size_t end;
895: enum texicmd cmd;
896: size_t endtoksz;
897:
898: endtoksz = strlen(endtoken);
899: assert(endtoksz > 0);
900:
901: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
902: switch (buf[*pos]) {
903: case ('}'):
1.3 kristaps 904: if (0 == p->ign)
905: texiwarn(p, "unexpected \"}\"");
1.1 kristaps 906: advance(p, buf, pos);
907: continue;
908: case ('{'):
1.3 kristaps 909: if (0 == p->ign)
910: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 911: advance(p, buf, pos);
912: continue;
913: case ('@'):
914: break;
915: default:
1.8 kristaps 916: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 917: continue;
918: }
919:
920: cmd = texicmd(p, buf, *pos, sz, &end);
921: advanceto(p, buf, pos, end);
922: if (TEXICMD_END == cmd) {
1.2 kristaps 923: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 924: advance(p, buf, pos);
925: /*
1.8 kristaps 926: * FIXME: check the full word, not just its
927: * initial substring!
1.1 kristaps 928: */
929: if (sz - *pos >= endtoksz && 0 == strncmp
930: (&buf[*pos], endtoken, endtoksz)) {
1.3 kristaps 931: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 932: break;
933: }
1.3 kristaps 934: if (0 == p->ign)
935: texiwarn(p, "unexpected \"end\"");
936: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 937: continue;
938: } else if (TEXICMD__MAX != cmd)
939: if (NULL != texitoks[cmd].fp)
940: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
941: }
942: }
943:
1.8 kristaps 944: /*
945: * Memory-map the file "fname" and begin parsing it.
946: * This can be called in a nested context.
947: */
1.1 kristaps 948: static void
1.2 kristaps 949: parsefile(struct texi *p, const char *fname)
950: {
951: struct texifile *f;
952: int fd;
953: struct stat st;
954:
955: assert(p->filepos < 64);
956: f = &p->files[p->filepos];
957: memset(f, 0, sizeof(struct texifile));
958:
959: f->name = fname;
960: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
961: texiabort(p, fname);
962: } else if (-1 == fstat(fd, &st)) {
963: close(fd);
964: texiabort(p, fname);
965: }
966:
967: f->mapsz = st.st_size;
968: f->map = mmap(NULL, f->mapsz,
969: PROT_READ, MAP_SHARED, fd, 0);
970: close(fd);
971:
972: if (MAP_FAILED == f->map)
973: texiabort(p, fname);
974:
975: p->filepos++;
976: parseeof(p, f->map, f->mapsz);
977: texifilepop(p);
978: }
979:
980: static void
1.3 kristaps 981: dodeftypevar(struct texi *p, enum texicmd cmd,
982: const char *buf, size_t sz, size_t *pos)
983: {
984: const char *blk;
985:
986: blk = TEXICMD_DEFTYPEVR == cmd ?
987: "deftypevr" : "deftypevar";
988:
989: if (p->ign) {
990: parseto(p, buf, sz, pos, blk);
991: return;
992: }
993:
1.5 kristaps 994: teximacro(p, "Pp");
1.3 kristaps 995: if (TEXICMD_DEFTYPEVR == cmd) {
996: parsebracket(p, buf, sz, pos);
997: texiputchars(p, ":\n");
998: }
1.5 kristaps 999: teximacroopen(p, "Vt");
1.4 kristaps 1000: parseeoln(p, buf, sz, pos);
1.3 kristaps 1001: teximacroclose(p);
1.5 kristaps 1002: teximacro(p, "Pp");
1.3 kristaps 1003: parseto(p, buf, sz, pos, blk);
1004: }
1005:
1006: static void
1007: dodeftypefun(struct texi *p, enum texicmd cmd,
1008: const char *buf, size_t sz, size_t *pos)
1009: {
1010: const char *blk;
1011:
1.5 kristaps 1012: blk = NULL;
1.3 kristaps 1013: switch (cmd) {
1014: case (TEXICMD_DEFTYPEFN):
1015: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1016: blk = texitoks[cmd].tok;
1.3 kristaps 1017: break;
1.5 kristaps 1018: default:
1.3 kristaps 1019: break;
1020: }
1021:
1022: if (p->ign) {
1023: if (NULL != blk)
1024: parseto(p, buf, sz, pos, blk);
1025: return;
1026: }
1027:
1028: switch (cmd) {
1029: case (TEXICMD_DEFTYPEFN):
1030: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1031: teximacro(p, "Pp");
1.3 kristaps 1032: break;
1033: default:
1034: break;
1035: }
1036: if (TEXICMD_DEFTYPEFN == cmd ||
1037: TEXICMD_DEFTYPEFNX == cmd) {
1038: parsebracket(p, buf, sz, pos);
1039: texiputchars(p, ":\n");
1040: }
1.5 kristaps 1041: teximacroopen(p, "Ft");
1.3 kristaps 1042: parsesingle(p, buf, sz, pos);
1043: teximacroclose(p);
1.5 kristaps 1044: teximacroopen(p, "Fn");
1.3 kristaps 1045: parsesingle(p, buf, sz, pos);
1046: teximacroclose(p);
1.5 kristaps 1047: teximacroopen(p, "Li");
1.4 kristaps 1048: parseeoln(p, buf, sz, pos);
1.3 kristaps 1049: teximacroclose(p);
1.5 kristaps 1050: teximacro(p, "Pp");
1.3 kristaps 1051: if (NULL != blk)
1052: parseto(p, buf, sz, pos, blk);
1053: }
1054:
1055: static void
1.1 kristaps 1056: doignblock(struct texi *p, enum texicmd cmd,
1057: const char *buf, size_t sz, size_t *pos)
1058: {
1059:
1.3 kristaps 1060: p->ign++;
1.5 kristaps 1061: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1062: p->ign--;
1.1 kristaps 1063: }
1064:
1065: static void
1.3 kristaps 1066: doblock(struct texi *p, enum texicmd cmd,
1.1 kristaps 1067: const char *buf, size_t sz, size_t *pos)
1068: {
1069:
1.5 kristaps 1070: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.1 kristaps 1071: }
1072:
1073: static void
1074: doinline(struct texi *p, const char *buf,
1075: size_t sz, size_t *pos, const char *macro)
1076: {
1077:
1.5 kristaps 1078: teximacroopen(p, macro);
1.1 kristaps 1079: p->seenws = 0;
1080: parsebracket(p, buf, sz, pos);
1081: if (*pos < sz - 1 &&
1082: ismpunct(buf[*pos]) &&
1.9 kristaps 1083: ismspace(buf[*pos + 1])) {
1.1 kristaps 1084: texiputchar(p, ' ');
1085: texiputchar(p, buf[*pos]);
1086: advance(p, buf, pos);
1087: }
1.5 kristaps 1088: teximacroclose(p);
1.1 kristaps 1089: }
1090:
1091: static void
1.2 kristaps 1092: doinclude(struct texi *p, enum texicmd cmd,
1093: const char *buf, size_t sz, size_t *pos)
1094: {
1095: char fname[PATH_MAX], path[PATH_MAX];
1096: size_t i;
1097: int rc;
1098:
1099: while (*pos < sz && ' ' == buf[*pos])
1100: advance(p, buf, pos);
1101:
1102: /* Read in the filename. */
1103: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1104: if (i == sizeof(fname) - 1)
1105: break;
1106: fname[i] = buf[*pos];
1107: advance(p, buf, pos);
1108: }
1109:
1110: if (i == 0)
1111: texierr(p, "path too short");
1112: else if ('\n' != buf[*pos])
1113: texierr(p, "path too long");
1114: else if ('/' == fname[0])
1115: texierr(p, "no absolute paths");
1116: fname[i] = '\0';
1117:
1118: if (strstr(fname, "../") || strstr(fname, "/.."))
1119: texierr(p, "insecure path");
1120:
1.5 kristaps 1121: for (i = 0; i < p->dirsz; i++) {
1122: rc = snprintf(path, sizeof(path),
1123: "%s/%s", p->dirs[i], fname);
1124: if (rc < 0)
1125: texierr(p, "couldn't format path");
1126: else if ((size_t)rc >= sizeof(path))
1127: texierr(p, "path too long");
1128: else if (-1 == access(path, R_OK))
1129: continue;
1130:
1131: parsefile(p, path);
1132: return;
1133: }
1.2 kristaps 1134:
1.5 kristaps 1135: texierr(p, "couldn't find %s in includes", fname);
1.2 kristaps 1136: }
1137:
1138: static void
1.1 kristaps 1139: doitalic(struct texi *p, enum texicmd cmd,
1140: const char *buf, size_t sz, size_t *pos)
1141: {
1142:
1143: texiputchars(p, "\\fI");
1144: parsebracket(p, buf, sz, pos);
1145: texiputchars(p, "\\fP");
1146: }
1147:
1148: static void
1.3 kristaps 1149: doenv(struct texi *p, enum texicmd cmd,
1150: const char *buf, size_t sz, size_t *pos)
1151: {
1152:
1153: if (p->literal)
1154: parsebracket(p, buf, sz, pos);
1155: else
1156: doinline(p, buf, sz, pos, "Ev");
1157: }
1158:
1159: static void
1.1 kristaps 1160: doliteral(struct texi *p, enum texicmd cmd,
1161: const char *buf, size_t sz, size_t *pos)
1162: {
1163:
1.3 kristaps 1164: if (p->literal)
1.1 kristaps 1165: parsebracket(p, buf, sz, pos);
1166: else
1167: doinline(p, buf, sz, pos, "Li");
1168: }
1169:
1170: static void
1171: doemph(struct texi *p, enum texicmd cmd,
1172: const char *buf, size_t sz, size_t *pos)
1173: {
1174:
1.3 kristaps 1175: if (p->literal)
1.1 kristaps 1176: doitalic(p, cmd, buf, sz, pos);
1177: else
1178: doinline(p, buf, sz, pos, "Em");
1179: }
1180:
1181: static void
1182: docommand(struct texi *p, enum texicmd cmd,
1183: const char *buf, size_t sz, size_t *pos)
1184: {
1185:
1186: doinline(p, buf, sz, pos, "Xr");
1187: }
1188:
1189: static void
1190: dobracket(struct texi *p, enum texicmd cmd,
1191: const char *buf, size_t sz, size_t *pos)
1192: {
1193:
1194: parsebracket(p, buf, sz, pos);
1195: }
1196:
1197: static void
1198: dofile(struct texi *p, enum texicmd cmd,
1199: const char *buf, size_t sz, size_t *pos)
1200: {
1201:
1.3 kristaps 1202: if (p->literal)
1.1 kristaps 1203: parsebracket(p, buf, sz, pos);
1204: else
1205: doinline(p, buf, sz, pos, "Pa");
1206: }
1207:
1208: static void
1.3 kristaps 1209: dodisplay(struct texi *p, enum texicmd cmd,
1210: const char *buf, size_t sz, size_t *pos)
1211: {
1212:
1.5 kristaps 1213: teximacro(p, "Bd -display -offset indent");
1.3 kristaps 1214: advanceeoln(p, buf, sz, pos, 1);
1215: parseto(p, buf, sz, pos, "display");
1.5 kristaps 1216: teximacro(p, "Ed");
1.3 kristaps 1217: }
1218:
1219: static void
1.1 kristaps 1220: doexample(struct texi *p, enum texicmd cmd,
1221: const char *buf, size_t sz, size_t *pos)
1222: {
1.3 kristaps 1223: const char *blk;
1224:
1225: blk = TEXICMD_EXAMPLE == cmd ? "example" : "smallexample";
1.1 kristaps 1226:
1.5 kristaps 1227: teximacro(p, "Bd -literal -offset indent");
1.3 kristaps 1228: advanceeoln(p, buf, sz, pos, 1);
1229: p->literal++;
1230: parseto(p, buf, sz, pos, blk);
1231: p->literal--;
1.5 kristaps 1232: teximacro(p, "Ed");
1.1 kristaps 1233: }
1234:
1235: static void
1236: dobye(struct texi *p, enum texicmd cmd,
1237: const char *buf, size_t sz, size_t *pos)
1238: {
1239:
1240: texiexit(p);
1241: exit(EXIT_SUCCESS);
1242: }
1243:
1244: static void
1.10 ! kristaps 1245: dotitle(struct texi *p, enum texicmd cmd,
! 1246: const char *buf, size_t sz, size_t *pos)
! 1247: {
! 1248: size_t start, end;
! 1249:
! 1250: while (*pos < sz && isws(buf[*pos]))
! 1251: advance(p, buf, pos);
! 1252: start = end = *pos;
! 1253: while (end < sz && '\n' != buf[end])
! 1254: end++;
! 1255: free(p->subtitle);
! 1256: p->subtitle = malloc(end - start + 1);
! 1257: memcpy(p->subtitle, &buf[start], end - start);
! 1258: p->subtitle[end - start] = '\0';
! 1259: }
! 1260:
! 1261: static void
1.1 kristaps 1262: dosymbol(struct texi *p, enum texicmd cmd,
1263: const char *buf, size_t sz, size_t *pos)
1264: {
1265:
1.3 kristaps 1266: if (p->seenws && p->outcol && 0 == p->literal) {
1267: texiputchar(p, ' ');
1268: p->seenws = 0;
1269: }
1270:
1.1 kristaps 1271: switch (cmd) {
1.3 kristaps 1272: case (TEXICMD_ASTERISK):
1273: case (TEXICMD_NEWLINE):
1274: case (TEXICMD_SPACE):
1275: case (TEXICMD_TAB):
1276: texiputchar(p, ' ');
1277: break;
1.1 kristaps 1278: case (TEXICMD_AT):
1.3 kristaps 1279: texiputchar(p, '@');
1280: break;
1281: case (TEXICMD_BANG):
1282: texiputchar(p, '!');
1.7 kristaps 1283: break;
1284: case (TEXICMD_BULLET):
1285: texiputchars(p, "\\(bu");
1.1 kristaps 1286: break;
1287: case (TEXICMD_COPYRIGHT):
1288: texiputchars(p, "\\(co");
1289: break;
1.2 kristaps 1290: case (TEXICMD_DOTS):
1291: texiputchars(p, "...");
1292: break;
1.1 kristaps 1293: case (TEXICMD_LATEX):
1294: texiputchars(p, "LaTeX");
1295: break;
1.3 kristaps 1296: case (TEXICMD_QUESTIONMARK):
1297: texiputchar(p, '?');
1298: break;
1299: case (TEXICMD_SQUIGGLE_LEFT):
1300: texiputchars(p, "{");
1301: break;
1302: case (TEXICMD_SQUIGGLE_RIGHT):
1303: texiputchars(p, "}");
1304: break;
1.1 kristaps 1305: case (TEXICMD_TEXSYM):
1306: texiputchars(p, "TeX");
1307: break;
1.3 kristaps 1308: case (TEXICMD_COLON):
1309: case (TEXICMD_HYPHEN):
1310: break;
1.1 kristaps 1311: default:
1.5 kristaps 1312: texiwarn(p, "sym: %d", cmd);
1.1 kristaps 1313: abort();
1314: }
1315:
1.5 kristaps 1316: if (texitoks[cmd].len > 1)
1317: doignbracket(p, cmd, buf, sz, pos);
1.1 kristaps 1318: }
1319:
1320: static void
1321: doquotation(struct texi *p, enum texicmd cmd,
1322: const char *buf, size_t sz, size_t *pos)
1323: {
1324:
1.5 kristaps 1325: teximacro(p, "Qo");
1.1 kristaps 1326: parseto(p, buf, sz, pos, "quotation");
1.5 kristaps 1327: teximacro(p, "Qc");
1.1 kristaps 1328: }
1329:
1.3 kristaps 1330: static void
1331: domath(struct texi *p, enum texicmd cmd,
1332: const char *buf, size_t sz, size_t *pos)
1333: {
1334: size_t nest;
1335:
1336: /*
1337: * Math handling is different from everything else.
1338: * We don't allow any subcomponents, and we ignore the rules in
1339: * terms of @-commands.
1340: * This departs from GNU's rules, but whatever.
1341: */
1342: while (*pos < sz && isws(buf[*pos]))
1343: advance(p, buf, pos);
1344: if (*pos == sz || '{' != buf[*pos])
1345: return;
1346: advance(p, buf, pos);
1347: if (p->seenws && p->outcol && 0 == p->literal)
1348: texiputchar(p, ' ');
1349: p->seenws = 0;
1350: for (nest = 1; *pos < sz && nest > 0; ) {
1351: if ('{' == buf[*pos])
1352: nest++;
1353: else if ('}' == buf[*pos])
1354: if (0 == --nest)
1355: continue;
1356: texiputchar(p, buf[*pos]);
1357: advance(p, buf, pos);
1358: }
1359: if (*pos == sz)
1360: return;
1361: assert('}' == buf[*pos]);
1362: advance(p, buf, pos);
1363: }
1364:
1.1 kristaps 1365: static void
1.8 kristaps 1366: dolink(struct texi *p, enum texicmd cmd,
1.1 kristaps 1367: const char *buf, size_t sz, size_t *pos)
1368: {
1.8 kristaps 1369: int c;
1.1 kristaps 1370:
1371: switch (cmd) {
1372: case (TEXICMD_EMAIL):
1.5 kristaps 1373: teximacroopen(p, "Mt");
1.1 kristaps 1374: break;
1.3 kristaps 1375: case (TEXICMD_UREF):
1.1 kristaps 1376: case (TEXICMD_URL):
1.5 kristaps 1377: teximacroopen(p, "Lk");
1.1 kristaps 1378: break;
1.8 kristaps 1379: case (TEXICMD_XREF):
1380: texiputchars(p, "See Section");
1381: teximacroopen(p, "Qq");
1382: break;
1383: case (TEXICMD_PXREF):
1384: texiputchars(p, "see Section");
1385: teximacroopen(p, "Qq");
1386: break;
1.1 kristaps 1387: default:
1.8 kristaps 1388: abort();
1.1 kristaps 1389: }
1.8 kristaps 1390:
1391: c = parsearg(p, buf, sz, pos, 0);
1392: p->ign++;
1393: while (c > 0)
1394: c = parsearg(p, buf, sz, pos, 1);
1395: p->ign--;
1396:
1.1 kristaps 1397: if (*pos < sz - 1 &&
1398: ismpunct(buf[*pos]) &&
1.9 kristaps 1399: ismspace(buf[*pos + 1])) {
1.1 kristaps 1400: texiputchar(p, ' ');
1401: texiputchar(p, buf[*pos]);
1402: advance(p, buf, pos);
1403: }
1.8 kristaps 1404:
1405: teximacroclose(p);
1406: }
1407:
1408: static void
1409: doignargn(struct texi *p, enum texicmd cmd,
1410: const char *buf, size_t sz, size_t *pos)
1411: {
1412: int c;
1413:
1414: c = parsearg(p, buf, sz, pos, 0);
1415: p->ign++;
1416: while (c > 0)
1417: c = parsearg(p, buf, sz, pos, 1);
1418: p->ign--;
1.1 kristaps 1419: }
1420:
1421: static void
1422: dosubsection(struct texi *p, enum texicmd cmd,
1423: const char *buf, size_t sz, size_t *pos)
1424: {
1425:
1.5 kristaps 1426: teximacro(p, "Pp");
1427: teximacroopen(p, "Em");
1.3 kristaps 1428: parseeoln(p, buf, sz, pos);
1.5 kristaps 1429: teximacroclose(p);
1430: teximacro(p, "Pp");
1.1 kristaps 1431: }
1432:
1433: static void
1434: dosection(struct texi *p, enum texicmd cmd,
1435: const char *buf, size_t sz, size_t *pos)
1436: {
1437:
1.3 kristaps 1438: if (p->outmacro)
1439: texierr(p, "subsection in open line scope!?");
1440: else if (p->literal)
1441: texierr(p, "subsection in a literal scope!?");
1442:
1.5 kristaps 1443: teximacroopen(p, "Ss");
1.3 kristaps 1444: parseeoln(p, buf, sz, pos);
1445: teximacroclose(p);
1446: }
1447:
1448: static void
1449: dosp(struct texi *p, enum texicmd cmd,
1450: const char *buf, size_t sz, size_t *pos)
1451: {
1452:
1.5 kristaps 1453: teximacro(p, "Pp");
1.3 kristaps 1454: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1455: }
1456:
1457: static void
1.3 kristaps 1458: dochapter(struct texi *p, enum texicmd cmd,
1.1 kristaps 1459: const char *buf, size_t sz, size_t *pos)
1460: {
1461:
1.3 kristaps 1462: if (p->outmacro)
1463: texierr(p, "section in open line scope!?");
1464: else if (p->literal)
1465: texierr(p, "section in a literal scope!?");
1466:
1.5 kristaps 1467: teximacroopen(p, "Sh");
1.3 kristaps 1468: parseeoln(p, buf, sz, pos);
1469: teximacroclose(p);
1.1 kristaps 1470: }
1471:
1472: static void
1473: dotop(struct texi *p, enum texicmd cmd,
1474: const char *buf, size_t sz, size_t *pos)
1475: {
1.10 ! kristaps 1476: const char *cp;
! 1477: time_t t;
! 1478: char date[32];
! 1479:
! 1480: t = time(NULL);
! 1481: strftime(date, sizeof(date), "%F", localtime(&t));
1.1 kristaps 1482:
1.3 kristaps 1483: p->ign--;
1484: advanceeoln(p, buf, sz, pos, 1);
1.10 ! kristaps 1485: teximacroopen(p, "Dd");
! 1486: texiputchars(p, date);
! 1487: teximacroclose(p);
! 1488: teximacroopen(p, "Dt");
! 1489: for (cp = p->title; '\0' != *cp; cp++)
! 1490: texiputchar(p, toupper(*cp));
! 1491: teximacroclose(p);
1.5 kristaps 1492: teximacro(p, "Os");
1493: teximacro(p, "Sh NAME");
1.10 ! kristaps 1494: teximacroopen(p, "Nm");
! 1495: texiputchars(p, p->title);
! 1496: teximacroclose(p);
! 1497: teximacroopen(p, "Nd");
! 1498: texiputchars(p, NULL != p->subtitle ?
! 1499: p->subtitle : "Unknown description");
! 1500: teximacroclose(p);
1.1 kristaps 1501: }
1502:
1503: static void
1504: doitem(struct texi *p, enum texicmd cmd,
1505: const char *buf, size_t sz, size_t *pos)
1506: {
1507:
1.3 kristaps 1508: if (p->outmacro)
1509: texierr(p, "item in open line scope!?");
1510: else if (p->literal)
1511: texierr(p, "item in a literal scope!?");
1512:
1513: switch (p->list) {
1514: case (TEXILIST_ITEM):
1.5 kristaps 1515: teximacroopen(p, "It");
1.3 kristaps 1516: break;
1517: case (TEXILIST_NOITEM):
1.5 kristaps 1518: teximacro(p, "It");
1.3 kristaps 1519: break;
1520: default:
1.5 kristaps 1521: teximacro(p, "Pp");
1.3 kristaps 1522: break;
1523: }
1524:
1525: parseeoln(p, buf, sz, pos);
1.1 kristaps 1526:
1.3 kristaps 1527: if (TEXILIST_ITEM == p->list)
1528: teximacroclose(p);
1.9 kristaps 1529: else if (p->outcol > 0)
1.1 kristaps 1530: texiputchar(p, '\n');
1531: }
1532:
1533: static void
1534: dotable(struct texi *p, enum texicmd cmd,
1535: const char *buf, size_t sz, size_t *pos)
1536: {
1.3 kristaps 1537: enum texilist sv = p->list;
1538:
1539: p->list = TEXILIST_ITEM;
1.5 kristaps 1540: teximacro(p, "Bl -tag -width Ds");
1.1 kristaps 1541: parseto(p, buf, sz, pos, "table");
1.5 kristaps 1542: teximacro(p, "El");
1.3 kristaps 1543: p->list = sv;
1.1 kristaps 1544: }
1545:
1546: static void
1.2 kristaps 1547: doenumerate(struct texi *p, enum texicmd cmd,
1548: const char *buf, size_t sz, size_t *pos)
1549: {
1.3 kristaps 1550: enum texilist sv = p->list;
1.2 kristaps 1551:
1.3 kristaps 1552: p->list = TEXILIST_NOITEM;
1.5 kristaps 1553: teximacro(p, "Bl -enum");
1.2 kristaps 1554: parseto(p, buf, sz, pos, "enumerate");
1.5 kristaps 1555: teximacro(p, "El");
1.3 kristaps 1556: p->list = sv;
1.2 kristaps 1557: }
1558:
1559: static void
1.1 kristaps 1560: doitemize(struct texi *p, enum texicmd cmd,
1561: const char *buf, size_t sz, size_t *pos)
1562: {
1.3 kristaps 1563: enum texilist sv = p->list;
1.1 kristaps 1564:
1.3 kristaps 1565: p->list = TEXILIST_ITEM;
1.5 kristaps 1566: teximacro(p, "Bl -bullet");
1.1 kristaps 1567: parseto(p, buf, sz, pos, "itemize");
1.5 kristaps 1568: teximacro(p, "El");
1.3 kristaps 1569: p->list = sv;
1.1 kristaps 1570: }
1571:
1572: static void
1573: doignbracket(struct texi *p, enum texicmd cmd,
1574: const char *buf, size_t sz, size_t *pos)
1575: {
1576:
1.3 kristaps 1577: p->ign++;
1.1 kristaps 1578: parsebracket(p, buf, sz, pos);
1.3 kristaps 1579: p->ign--;
1.1 kristaps 1580: }
1581:
1582: static void
1583: doignline(struct texi *p, enum texicmd cmd,
1584: const char *buf, size_t sz, size_t *pos)
1585: {
1586:
1.3 kristaps 1587: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1588: }
1589:
1.8 kristaps 1590: /*
1591: * Parse colon-separated directories from "cp" (if not NULL) and returns
1592: * the array of pointers.
1593: * Prepends "base" to the array.
1594: * This does NOT sanitise the directories!
1595: */
1.5 kristaps 1596: static char **
1597: parsedirs(const char *base, const char *cp, size_t *sz)
1598: {
1599: char *tok, *str, *tofree;
1600: const char *cpp;
1601: size_t i;
1602: char **dirs;
1603:
1604: *sz = NULL != (cpp = cp) ? 2 : 1;
1605: if (*sz > 1)
1606: for ( ; NULL != (cpp = strchr(cpp, ':')); (*sz)++)
1607: cpp++;
1608:
1609: dirs = calloc(*sz, sizeof(char *));
1610: if (NULL == dirs) {
1611: perror(NULL);
1612: exit(EXIT_FAILURE);
1613: } else if (NULL == (dirs[0] = strdup(base))) {
1614: perror(NULL);
1615: exit(EXIT_FAILURE);
1616: }
1617:
1618: if (NULL == cp)
1619: return(dirs);
1620:
1621: if (NULL == (tofree = tok = str = strdup(cp))) {
1622: perror(NULL);
1623: exit(EXIT_FAILURE);
1624: }
1625:
1626: for (i = 1; NULL != (tok = strsep(&str, ":")); i++)
1627: if (NULL == (dirs[i] = strdup(tok))) {
1628: perror(NULL);
1629: exit(EXIT_FAILURE);
1630: }
1631:
1632: free(tofree);
1633: return(dirs);
1634: }
1635:
1.1 kristaps 1636: int
1637: main(int argc, char *argv[])
1638: {
1639: struct texi texi;
1.2 kristaps 1640: int c;
1641: char *path, *dir;
1.10 ! kristaps 1642: const char *progname, *Idir, *cp;
1.1 kristaps 1643:
1644: progname = strrchr(argv[0], '/');
1645: if (progname == NULL)
1646: progname = argv[0];
1647: else
1648: ++progname;
1649:
1.10 ! kristaps 1650: memset(&texi, 0, sizeof(struct texi));
1.5 kristaps 1651: Idir = NULL;
1.10 ! kristaps 1652:
1.5 kristaps 1653: while (-1 != (c = getopt(argc, argv, "I:")))
1.1 kristaps 1654: switch (c) {
1.5 kristaps 1655: case ('I'):
1656: Idir = optarg;
1657: break;
1.1 kristaps 1658: default:
1659: goto usage;
1660: }
1661:
1662: argv += optind;
1663: if (0 == (argc -= optind))
1664: goto usage;
1665:
1.2 kristaps 1666: if (NULL == (path = strdup(argv[0]))) {
1667: perror(NULL);
1668: exit(EXIT_FAILURE);
1669: } else if (NULL == (dir = dirname(path))) {
1670: perror(argv[0]);
1671: free(path);
1672: exit(EXIT_FAILURE);
1673: }
1674: free(path);
1675:
1.10 ! kristaps 1676: if (NULL != (cp = strrchr(argv[0], '/')))
! 1677: texi.title = strdup(cp + 1);
! 1678: else
! 1679: texi.title = strdup(argv[0]);
! 1680:
! 1681: if (NULL == texi.title) {
! 1682: perror(NULL);
! 1683: exit(EXIT_FAILURE);
! 1684: } else if (NULL != (path = strchr(texi.title, '.')))
! 1685: *path = '\0';
! 1686:
1.3 kristaps 1687: texi.ign = 1;
1.5 kristaps 1688: texi.dirs = parsedirs(dir, Idir, &texi.dirsz);
1.2 kristaps 1689: parsefile(&texi, argv[0]);
1.5 kristaps 1690: /* We shouldn't get here. */
1.2 kristaps 1691: texiexit(&texi);
1692: return(EXIT_FAILURE);
1.1 kristaps 1693: usage:
1.8 kristaps 1694: fprintf(stderr, "usage: %s [-Idirs] file\n", progname);
1.1 kristaps 1695: return(EXIT_FAILURE);
1696: }
CVSweb