Annotation of texi2mdoc/main.c, Revision 1.11
1.11 ! kristaps 1: /* $Id: main.c,v 1.10 2015/02/18 15:30:31 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 kristaps 24: #include <libgen.h>
25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
1.10 kristaps 30: #include <time.h>
1.6 kristaps 31: #include <unistd.h>
1.1 kristaps 32:
33: /*
34: * This defines each one of the Texinfo commands that we understand.
35: * Obviously this only refers to native commands; overriden names are a
36: * different story.
37: */
38: enum texicmd {
1.2 kristaps 39: TEXICMD_ACRONYM,
1.1 kristaps 40: TEXICMD_A4PAPER,
41: TEXICMD_ANCHOR,
1.2 kristaps 42: TEXICMD_APPENDIX,
43: TEXICMD_APPENDIXSEC,
1.3 kristaps 44: TEXICMD_ASTERISK,
1.1 kristaps 45: TEXICMD_AT,
1.3 kristaps 46: TEXICMD_AUTHOR,
47: TEXICMD_BANG,
1.7 kristaps 48: TEXICMD_BULLET,
1.1 kristaps 49: TEXICMD_BYE,
1.5 kristaps 50: TEXICMD_CENTER,
1.1 kristaps 51: TEXICMD_CHAPTER,
52: TEXICMD_CINDEX,
1.3 kristaps 53: TEXICMD_CITE,
1.1 kristaps 54: TEXICMD_CODE,
1.3 kristaps 55: TEXICMD_COLON,
1.1 kristaps 56: TEXICMD_COMMAND,
57: TEXICMD_COMMENT,
1.2 kristaps 58: TEXICMD_COMMENT_LONG,
1.1 kristaps 59: TEXICMD_CONTENTS,
60: TEXICMD_COPYING,
61: TEXICMD_COPYRIGHT,
1.3 kristaps 62: TEXICMD_DEFTYPEFN,
63: TEXICMD_DEFTYPEFNX,
64: TEXICMD_DEFTYPEFUN,
65: TEXICMD_DEFTYPEFUNX,
66: TEXICMD_DEFTYPEVAR,
67: TEXICMD_DEFTYPEVR,
1.1 kristaps 68: TEXICMD_DETAILMENU,
1.3 kristaps 69: TEXICMD_DFN,
1.1 kristaps 70: TEXICMD_DIRCATEGORY,
71: TEXICMD_DIRENTRY,
1.3 kristaps 72: TEXICMD_DISPLAY,
1.2 kristaps 73: TEXICMD_DOTS,
1.1 kristaps 74: TEXICMD_EMAIL,
75: TEXICMD_EMPH,
76: TEXICMD_END,
1.2 kristaps 77: TEXICMD_ENUMERATE,
1.3 kristaps 78: TEXICMD_ENV,
1.1 kristaps 79: TEXICMD_EXAMPLE,
80: TEXICMD_FILE,
1.3 kristaps 81: TEXICMD_GROUP,
1.2 kristaps 82: TEXICMD_HEADING,
1.3 kristaps 83: TEXICMD_HEADINGS,
84: TEXICMD_HYPHEN,
1.1 kristaps 85: TEXICMD_I,
1.3 kristaps 86: TEXICMD_IFCLEAR,
1.1 kristaps 87: TEXICMD_IFHTML,
1.3 kristaps 88: TEXICMD_IFINFO,
1.1 kristaps 89: TEXICMD_IFNOTTEX,
90: TEXICMD_IFTEX,
1.3 kristaps 91: TEXICMD_IFSET,
1.1 kristaps 92: TEXICMD_IMAGE,
1.2 kristaps 93: TEXICMD_INCLUDE,
1.5 kristaps 94: TEXICMD_INSERTCOPYING,
1.1 kristaps 95: TEXICMD_ITEM,
96: TEXICMD_ITEMIZE,
97: TEXICMD_KBD,
98: TEXICMD_LATEX,
1.3 kristaps 99: TEXICMD_MATH,
1.1 kristaps 100: TEXICMD_MENU,
1.3 kristaps 101: TEXICMD_NEWLINE,
1.1 kristaps 102: TEXICMD_NODE,
1.3 kristaps 103: TEXICMD_NOINDENT,
1.8 kristaps 104: TEXICMD_PXREF,
1.3 kristaps 105: TEXICMD_QUESTIONMARK,
1.1 kristaps 106: TEXICMD_QUOTATION,
1.3 kristaps 107: TEXICMD_PAGE,
1.1 kristaps 108: TEXICMD_PARINDENT,
1.2 kristaps 109: TEXICMD_PRINTINDEX,
1.1 kristaps 110: TEXICMD_REF,
111: TEXICMD_SAMP,
1.7 kristaps 112: TEXICMD_SC,
1.1 kristaps 113: TEXICMD_SECTION,
1.3 kristaps 114: TEXICMD_SET,
1.1 kristaps 115: TEXICMD_SETCHAPNEWPAGE,
116: TEXICMD_SETFILENAME,
117: TEXICMD_SETTITLE,
1.3 kristaps 118: TEXICMD_SP,
119: TEXICMD_SPACE,
120: TEXICMD_SMALLEXAMPLE,
121: TEXICMD_SQUIGGLE_LEFT,
122: TEXICMD_SQUIGGLE_RIGHT,
1.8 kristaps 123: TEXICMD_STRONG,
1.1 kristaps 124: TEXICMD_SUBSECTION,
1.3 kristaps 125: TEXICMD_SUBTITLE,
126: TEXICMD_TAB,
1.1 kristaps 127: TEXICMD_TABLE,
128: TEXICMD_TEX,
129: TEXICMD_TEXSYM,
1.3 kristaps 130: TEXICMD_TITLE,
1.1 kristaps 131: TEXICMD_TITLEFONT,
132: TEXICMD_TITLEPAGE,
133: TEXICMD_TOP,
134: TEXICMD_UNNUMBERED,
1.2 kristaps 135: TEXICMD_UNNUMBEREDSEC,
1.3 kristaps 136: TEXICMD_UREF,
1.1 kristaps 137: TEXICMD_URL,
138: TEXICMD_VAR,
1.9 kristaps 139: TEXICMD_VSKIP,
1.3 kristaps 140: TEXICMD_W,
1.8 kristaps 141: TEXICMD_XREF,
1.1 kristaps 142: TEXICMD__MAX
143: };
144:
145: /*
146: * The file currently being parsed.
147: * This keeps track of our location within that file.
148: */
149: struct texifile {
150: const char *name; /* name of the file */
151: size_t line; /* current line (from zero) */
152: size_t col; /* current column in line (from zero) */
153: char *map; /* mmap'd file */
154: size_t mapsz; /* size of mmap */
155: };
156:
157: struct texi;
158:
1.2 kristaps 159: /*
160: * Callback for functions implementing texi commands.
161: */
1.1 kristaps 162: typedef void (*texicmdfp)(struct texi *,
163: enum texicmd, const char *, size_t, size_t *);
164:
165: /*
166: * Describes Texinfo commands, whether native or overriden.
167: */
168: struct texitok {
169: texicmdfp fp; /* callback (or NULL if none) */
170: const char *tok; /* name of the token */
171: size_t len; /* strlen(tok) */
172: };
173:
1.3 kristaps 174: enum texilist {
175: TEXILIST_NONE = 0,
176: TEXILIST_ITEM,
177: TEXILIST_NOITEM,
178: };
179:
1.1 kristaps 180: /*
181: * The main parse structure.
182: * This keeps any necessary information handy.
183: */
184: struct texi {
1.5 kristaps 185: struct texifile files[64]; /* stack of open files */
186: size_t filepos; /* number of open files */
187: size_t outcol; /* column in output line */
188: char **dirs; /* texi directories */
189: size_t dirsz; /* number of texi directories */
1.11 ! kristaps 190: char *title; /* title of document */
! 191: char *subtitle; /* subtitle of document */
! 192: /*
! 193: * The following control what we output to the screen.
! 194: * The complexity is required to accomodate for mdoc(7).
! 195: */
1.8 kristaps 196: enum texilist list; /* current list (set recursively) */
197: int outmacro; /* if >0, output is in line macro */
198: int seenws; /* ws has been seen (and ignored) */
1.11 ! kristaps 199: int seenvs; /* newline has been Pp'd */
1.8 kristaps 200: int ign; /* if >0, don't print anything */
201: int literal; /* if >0, literal context */
1.1 kristaps 202: };
203:
1.8 kristaps 204: /* Texi disregards spaces and tabs. */
1.2 kristaps 205: #define isws(_x) \
206: (' ' == (_x) || '\t' == (_x))
1.9 kristaps 207: #define ismspace(_x) \
1.10 kristaps 208: (isws((_x)) || '\n' == (_x))
1.1 kristaps 209:
1.3 kristaps 210: static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 211: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
212: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 213: static void dochapter(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 214: static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 215: static void dodeftypefun(struct texi *, enum texicmd, const char *, size_t, size_t *);
216: static void dodeftypevar(struct texi *, enum texicmd, const char *, size_t, size_t *);
217: static void dodisplay(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 218: static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 219: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 220: static void doenv(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 221: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
222: static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 223: static void doignargn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 224: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
225: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
226: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 227: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 228: static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *);
229: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
230: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 231: static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 232: static void doliteral(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 233: static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 234: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
235: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
236: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
237: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 238: static void dosp(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 239: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
240: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.10 kristaps 241: static void dotitle(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 242:
243: static const struct texitok texitoks[TEXICMD__MAX] = {
1.8 kristaps 244: { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.1 kristaps 245: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
246: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.3 kristaps 247: { dochapter, "appendix", 8 }, /* TEXICMD_APPENDIX */
248: { dochapter, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
249: { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */
1.1 kristaps 250: { dosymbol, "@", 1 }, /* TEXICMD_AT */
1.3 kristaps 251: { doignline, "author", 6 }, /* TEXICMD_AUTHOR */
252: { dosymbol, "!", 1 }, /* TEXICMD_BANG */
1.7 kristaps 253: { dosymbol, "bullet", 6 }, /* TEXICMD_BULLET */
1.1 kristaps 254: { dobye, "bye", 3 }, /* TEXICMD_BYE */
1.5 kristaps 255: { doignline, "center", 5 }, /* TEXICMD_CENTER */
1.3 kristaps 256: { dochapter, "chapter", 7 }, /* TEXICMD_CHAPTER */
1.1 kristaps 257: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
258: { doliteral, "code", 4 }, /* TEXICMD_CODE */
1.3 kristaps 259: { doitalic, "cite", 4 }, /* TEXICMD_CITE */
260: { dosymbol, ":", 1 }, /* TEXICMD_COLON */
1.1 kristaps 261: { docommand, "command", 7 }, /* TEXICMD_COMMAND */
262: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 kristaps 263: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 264: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
265: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
266: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
1.3 kristaps 267: { dodeftypefun, "deftypefn", 9 }, /* TEXICMD_DEFTYPEFN */
268: { dodeftypefun, "deftypefnx", 10 }, /* TEXICMD_DEFTYPEFNX */
269: { dodeftypefun, "deftypefun", 10 }, /* TEXICMD_DEFTYPEFUN */
270: { dodeftypefun, "deftypefunx", 11 }, /* TEXICMD_DEFTYPEFUNX */
271: { dodeftypevar, "deftypevar", 10 }, /* TEXICMD_DEFTYPEVAR */
272: { dodeftypevar, "deftypevr", 9 }, /* TEXICMD_DEFTYPEVR */
1.1 kristaps 273: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
1.3 kristaps 274: { doitalic, "dfn", 3 }, /* TEXICMD_DFN */
1.1 kristaps 275: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
276: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.3 kristaps 277: { dodisplay, "display", 7 }, /* TEXICMD_DISPLAY */
1.2 kristaps 278: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.8 kristaps 279: { dolink, "email", 5 }, /* TEXICMD_EMAIL */
1.1 kristaps 280: { doemph, "emph", 4 }, /* TEXICMD_EMPH */
281: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 kristaps 282: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.3 kristaps 283: { doenv, "env", 3 }, /* TEXICMD_ENV */
1.1 kristaps 284: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
285: { dofile, "file", 4 }, /* TEXICMD_FILE */
1.3 kristaps 286: { doblock, "group", 5 }, /* TEXICMD_GROUP */
1.2 kristaps 287: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.3 kristaps 288: { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */
289: { dosymbol, "-", 1 }, /* TEXICMD_HYPHEN */
1.1 kristaps 290: { doitalic, "i", 1 }, /* TEXICMD_I */
1.3 kristaps 291: { doignblock, "ifclear", 7 }, /* TEXICMD_IFCLEAR */
1.1 kristaps 292: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
1.3 kristaps 293: { doignblock, "ifinfo", 6 }, /* TEXICMD_IFINFO */
294: { doblock, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
1.1 kristaps 295: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
1.3 kristaps 296: { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
1.1 kristaps 297: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 kristaps 298: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.5 kristaps 299: { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
1.1 kristaps 300: { doitem, "item", 4 }, /* TEXICMD_ITEM */
301: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
302: { doliteral, "kbd", 3 }, /* TEXICMD_KBD */
303: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
1.3 kristaps 304: { domath, "math", 4 }, /* TEXICMD_MATH */
1.1 kristaps 305: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
1.3 kristaps 306: { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */
1.1 kristaps 307: { doignline, "node", 4 }, /* TEXICMD_NODE */
1.3 kristaps 308: { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */
1.8 kristaps 309: { dolink, "pxref", 5 }, /* TEXICMD_PXREF */
1.3 kristaps 310: { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */
1.1 kristaps 311: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.3 kristaps 312: { doignline, "page", 4 }, /* TEXICMD_PAGE */
313: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
1.2 kristaps 314: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.1 kristaps 315: { dobracket, "ref", 3 }, /* TEXICMD_REF */
316: { doliteral, "samp", 4 }, /* TEXICMD_SAMP */
1.7 kristaps 317: { dobracket, "sc", 2 }, /* TEXICMD_SC */
1.1 kristaps 318: { dosection, "section", 7 }, /* TEXICMD_SECTION */
1.3 kristaps 319: { doignline, "set", 3 }, /* TEXICMD_SET */
1.1 kristaps 320: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
321: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
1.10 kristaps 322: { dotitle, "settitle", 8 }, /* TEXICMD_SETTITLE */
1.3 kristaps 323: { dosp, "sp", 2 }, /* TEXICMD_SP */
324: { dosymbol, " ", 1 }, /* TEXICMD_SPACE */
325: { doexample, "smallexample", 12 }, /* TEXICMD_SMALLEXAMPLE */
326: { dosymbol, "{", 1 }, /* TEXICMD_SQUIGGLE_LEFT */
327: { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */
1.8 kristaps 328: { doemph, "strong", 6 }, /* TEXICMD_STRONG */
1.1 kristaps 329: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
1.3 kristaps 330: { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */
331: { dosymbol, "\t", 1 }, /* TEXICMD_TAB */
1.1 kristaps 332: { dotable, "table", 5 }, /* TEXICMD_TABLE */
333: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
334: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
1.3 kristaps 335: { doignline, "title", 5 }, /* TEXICMD_TITLE */
1.1 kristaps 336: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
337: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
338: { dotop, "top", 3 }, /* TEXICMD_TOP */
1.3 kristaps 339: { dochapter, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 kristaps 340: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.8 kristaps 341: { dolink, "uref", 4 }, /* TEXICMD_UREF */
342: { dolink, "url", 3 }, /* TEXICMD_URL */
1.1 kristaps 343: { doliteral, "var", 3 }, /* TEXICMD_VAR */
1.9 kristaps 344: { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */
1.3 kristaps 345: { dobracket, "w", 1 }, /* TEXICMD_W */
1.8 kristaps 346: { dolink, "xref", 4 }, /* TEXICMD_XREF */
1.1 kristaps 347: };
348:
1.2 kristaps 349: /*
350: * Unmap the top-most file that we're using.
351: */
1.1 kristaps 352: static void
353: texifilepop(struct texi *p)
354: {
355: struct texifile *f;
356:
357: assert(p->filepos > 0);
358: f = &p->files[--p->filepos];
359: munmap(f->map, f->mapsz);
360: }
361:
1.2 kristaps 362: /*
1.8 kristaps 363: * Unmap all files that we're currently using and free all resources
364: * that we've allocated during the parse.
1.2 kristaps 365: * The utility should exit(...) after this is called.
366: */
1.1 kristaps 367: static void
368: texiexit(struct texi *p)
369: {
1.5 kristaps 370: size_t i;
371:
372: if (p->outcol)
373: putchar('\n');
1.1 kristaps 374:
375: while (p->filepos > 0)
376: texifilepop(p);
1.5 kristaps 377:
378: for (i = 0; i < p->dirsz; i++)
379: free(p->dirs[i]);
1.10 kristaps 380:
1.5 kristaps 381: free(p->dirs);
1.10 kristaps 382: free(p->subtitle);
383: free(p->title);
1.1 kristaps 384: }
385:
1.2 kristaps 386: /*
387: * Fatal error: unmap all files and exit.
388: * The "errstring" is passed to perror(3).
389: */
1.1 kristaps 390: static void
1.2 kristaps 391: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 392: {
393:
394: perror(errstring);
395: texiexit(p);
396: exit(EXIT_FAILURE);
397: }
398:
399: /*
400: * Print a generic warning message (to stderr) tied to our current
401: * location in the parse sequence.
402: */
403: static void
404: texiwarn(const struct texi *p, const char *fmt, ...)
405: {
406: va_list ap;
407:
1.2 kristaps 408: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 409: p->files[p->filepos - 1].name,
410: p->files[p->filepos - 1].line + 1,
411: p->files[p->filepos - 1].col + 1);
412: va_start(ap, fmt);
413: vfprintf(stderr, fmt, ap);
414: va_end(ap);
415: fputc('\n', stderr);
416: }
417:
1.8 kristaps 418: /*
419: * Print an error message (to stderr) tied to our current location in
420: * the parse sequence, invoke texiexit(), then die.
421: */
1.2 kristaps 422: static void
423: texierr(struct texi *p, const char *fmt, ...)
424: {
425: va_list ap;
426:
427: fprintf(stderr, "%s:%zu:%zu: error: ",
428: p->files[p->filepos - 1].name,
429: p->files[p->filepos - 1].line + 1,
430: p->files[p->filepos - 1].col + 1);
431: va_start(ap, fmt);
432: vfprintf(stderr, fmt, ap);
433: va_end(ap);
434: fputc('\n', stderr);
435: texiexit(p);
436: exit(EXIT_FAILURE);
437: }
438:
1.1 kristaps 439: /*
1.8 kristaps 440: * Put a single data character to the output if we're not ignoring.
441: * Adjusts our output status.
1.1 kristaps 442: */
443: static void
444: texiputchar(struct texi *p, char c)
445: {
446:
1.3 kristaps 447: if (p->ign)
1.1 kristaps 448: return;
449: putchar(c);
1.11 ! kristaps 450: p->seenvs = 0;
1.1 kristaps 451: if ('\n' == c) {
452: p->outcol = 0;
453: p->seenws = 0;
454: } else
455: p->outcol++;
456: }
457:
458: /*
459: * Put multiple characters (see texiputchar()).
460: */
461: static void
462: texiputchars(struct texi *p, const char *s)
463: {
464:
465: while ('\0' != *s)
466: texiputchar(p, *s++);
467: }
468:
469: /*
1.8 kristaps 470: * Close an mdoc(7) macro opened with teximacroopen().
471: * If there are no more macros on the line, prints a newline.
1.1 kristaps 472: */
473: static void
1.3 kristaps 474: teximacroclose(struct texi *p)
475: {
476:
1.8 kristaps 477: /* FIXME: punctuation. */
1.5 kristaps 478: if (0 == --p->outmacro)
479: texiputchar(p, '\n');
1.3 kristaps 480: }
481:
482: /*
1.8 kristaps 483: * Open a mdoc(7) macro.
484: * This is used for line macros, e.g., Qq [foo bar baz].
485: * It can be invoked for nested macros, e.g., Qq Li foo .
1.3 kristaps 486: */
487: static void
488: teximacroopen(struct texi *p, const char *s)
1.1 kristaps 489: {
490:
1.5 kristaps 491: if (p->outcol && 0 == p->outmacro)
492: texiputchar(p, '\n');
493: if (0 == p->outmacro)
494: texiputchar(p, '.');
495: else
496: texiputchar(p, ' ');
497: texiputchars(p, s);
498: texiputchar(p, ' ');
1.3 kristaps 499: p->outmacro++;
1.5 kristaps 500: p->seenws = 0;
1.1 kristaps 501: }
502:
503: /*
1.8 kristaps 504: * Put a stadnalone mdoc(7) command with the trailing newline.
1.1 kristaps 505: */
506: static void
507: teximacro(struct texi *p, const char *s)
508: {
509:
1.4 kristaps 510: if (p->outmacro)
511: texierr(p, "\"%s\" in open line scope!?", s);
512: else if (p->literal)
513: texierr(p, "\"%s\" in a literal scope!?", s);
514:
1.1 kristaps 515: if (p->outcol)
516: texiputchar(p, '\n');
1.5 kristaps 517:
518: texiputchar(p, '.');
519: texiputchars(p, s);
520: texiputchar(p, '\n');
1.1 kristaps 521: }
522:
1.11 ! kristaps 523: static void
! 524: texivspace(struct texi *p)
! 525: {
! 526:
! 527: if (p->seenvs)
! 528: return;
! 529: teximacro(p, "Pp");
! 530: p->seenvs = 1;
! 531: }
! 532:
1.1 kristaps 533: /*
534: * Advance by a single byte in the input stream.
535: */
536: static void
537: advance(struct texi *p, const char *buf, size_t *pos)
538: {
539:
540: if ('\n' == buf[*pos]) {
541: p->files[p->filepos - 1].line++;
542: p->files[p->filepos - 1].col = 0;
543: } else
544: p->files[p->filepos - 1].col++;
545:
546: (*pos)++;
547: }
548:
1.11 ! kristaps 549: static void
! 550: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
! 551: {
! 552: size_t start, end;
! 553:
! 554: if (1 != p->outmacro)
! 555: return;
! 556:
! 557: for (start = end = *pos; end < sz; end++) {
! 558: switch (buf[end]) {
! 559: case (','):
! 560: case (')'):
! 561: case ('.'):
! 562: case ('"'):
! 563: case (':'):
! 564: case ('!'):
! 565: case ('?'):
! 566: continue;
! 567: default:
! 568: break;
! 569: }
! 570: break;
! 571: }
! 572: if (end == *pos)
! 573: return;
! 574: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
! 575: for ( ; start < end; start++) {
! 576: texiputchar(p, ' ');
! 577: texiputchar(p, buf[start]);
! 578: advance(p, buf, pos);
! 579: }
! 580: }
! 581: }
! 582:
1.1 kristaps 583: /*
584: * Advance to the next non-whitespace word in the input stream.
585: * If we're in literal mode, then print all of the whitespace as we're
586: * doing so.
587: */
588: static size_t
589: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
590: {
591:
1.3 kristaps 592: if (p->literal) {
1.9 kristaps 593: while (*pos < sz && ismspace(buf[*pos])) {
1.5 kristaps 594: if (*pos && '\n' == buf[*pos] &&
595: '\\' == buf[*pos - 1])
596: texiputchar(p, 'e');
1.1 kristaps 597: texiputchar(p, buf[*pos]);
598: advance(p, buf, pos);
599: }
600: return(*pos);
601: }
602:
1.9 kristaps 603: while (*pos < sz && ismspace(buf[*pos])) {
1.1 kristaps 604: p->seenws = 1;
605: /*
606: * If it looks like we've printed a double-line, then
607: * output a paragraph.
608: * FIXME: this is stupid.
609: */
1.11 ! kristaps 610: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
! 611: texivspace(p);
1.1 kristaps 612: advance(p, buf, pos);
613: }
614: return(*pos);
615: }
616:
617: /*
618: * Advance to the EOLN in the input stream.
619: */
620: static size_t
1.3 kristaps 621: advanceeoln(struct texi *p, const char *buf,
622: size_t sz, size_t *pos, int consumenl)
1.1 kristaps 623: {
624:
1.8 kristaps 625: /* FIXME: disregards @NEWLINE. */
1.1 kristaps 626: while (*pos < sz && '\n' != buf[*pos])
627: advance(p, buf, pos);
1.3 kristaps 628: if (*pos < sz && consumenl)
629: advance(p, buf, pos);
1.1 kristaps 630: return(*pos);
631: }
632:
633: /*
634: * Advance to position "end", which is an absolute position in the
635: * current buffer greater than or equal to the current position.
636: */
637: static void
638: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
639: {
640:
641: assert(*pos <= end);
642: while (*pos < end)
643: advance(p, buf, pos);
644: }
645:
646: /*
647: * Output a free-form word in the input stream, progressing to the next
648: * command or white-space.
649: * This also will advance the input stream.
650: */
651: static void
1.8 kristaps 652: texiword(struct texi *p, const char *buf,
653: size_t sz, size_t *pos, char extra)
1.1 kristaps 654: {
655:
1.3 kristaps 656: if (0 == p->outmacro && p->outcol > 72 && 0 == p->literal)
1.1 kristaps 657: texiputchar(p, '\n');
1.8 kristaps 658: /* FIXME: abstract this: we use it elsewhere. */
1.3 kristaps 659: if (p->seenws && p->outcol && 0 == p->literal)
1.1 kristaps 660: texiputchar(p, ' ');
661:
662: p->seenws = 0;
663:
1.9 kristaps 664: while (*pos < sz && ! ismspace(buf[*pos])) {
1.1 kristaps 665: switch (buf[*pos]) {
666: case ('@'):
667: case ('}'):
668: case ('{'):
669: return;
670: }
1.8 kristaps 671: if ('\0' != extra && buf[*pos] == extra)
672: return;
1.1 kristaps 673: if (*pos < sz - 1 &&
674: '`' == buf[*pos] &&
675: '`' == buf[*pos + 1]) {
676: texiputchars(p, "\\(lq");
677: advance(p, buf, pos);
678: } else if (*pos < sz - 1 &&
679: '\'' == buf[*pos] &&
680: '\'' == buf[*pos + 1]) {
681: texiputchars(p, "\\(rq");
682: advance(p, buf, pos);
683: } else
684: texiputchar(p, buf[*pos]);
685: advance(p, buf, pos);
686: }
687: }
688:
1.8 kristaps 689: /*
690: * Look up the command at position "pos" in the buffer, returning it (or
691: * TEXICMD__MAX if none found) and setting "end" to be the absolute
692: * index after the command name.
693: */
1.1 kristaps 694: static enum texicmd
695: texicmd(struct texi *p, const char *buf,
696: size_t pos, size_t sz, size_t *end)
697: {
698: size_t i, len;
699:
700: assert('@' == buf[pos]);
1.3 kristaps 701:
1.9 kristaps 702: if ((*end = pos) == sz)
703: return(TEXICMD__MAX);
704: else if ((*end = ++pos) == sz)
1.3 kristaps 705: return(TEXICMD__MAX);
706:
707: /* Alphabetic commands are special. */
708: if ( ! isalpha(buf[pos])) {
1.9 kristaps 709: if ((*end = pos + 1) == sz)
710: return(TEXICMD__MAX);
1.3 kristaps 711: for (i = 0; i < TEXICMD__MAX; i++) {
712: if (1 != texitoks[i].len)
713: continue;
714: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
715: return(i);
716: }
717: texiwarn(p, "bad command: @%c", buf[pos]);
718: return(TEXICMD__MAX);
719: }
720:
1.9 kristaps 721: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
1.3 kristaps 722: if ((*end > pos && ('@' == buf[*end] ||
723: '{' == buf[*end] || '}' == buf[*end])))
1.1 kristaps 724: break;
725:
726: len = *end - pos;
727: for (i = 0; i < TEXICMD__MAX; i++) {
728: if (len != texitoks[i].len)
729: continue;
730: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
731: return(i);
732: }
733:
1.3 kristaps 734: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
1.1 kristaps 735: return(TEXICMD__MAX);
736: }
737:
1.8 kristaps 738: /*
739: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
740: * Num should be set to the argument we're currently parsing, although
741: * it suffixes for it to be zero or non-zero.
742: * This will return 1 if there are more arguments, 0 otherwise.
743: * This will stop (returning 0) in the event of EOF or if we're not at a
744: * bracket for the zeroth parse.
745: */
746: static int
747: parsearg(struct texi *p, const char *buf,
748: size_t sz, size_t *pos, size_t num)
749: {
750: size_t end;
751: enum texicmd cmd;
752:
1.9 kristaps 753: while (*pos < sz && ismspace(buf[*pos]))
1.8 kristaps 754: advance(p, buf, pos);
755: if (*pos == sz || (0 == num && '{' != buf[*pos]))
756: return(0);
757: if (0 == num)
758: advance(p, buf, pos);
759:
760: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
761: switch (buf[*pos]) {
762: case (','):
763: advance(p, buf, pos);
764: return(1);
765: case ('}'):
766: advance(p, buf, pos);
767: return(0);
768: case ('{'):
769: if (0 == p->ign)
770: texiwarn(p, "unexpected \"{\"");
771: advance(p, buf, pos);
772: continue;
773: case ('@'):
774: break;
775: default:
776: texiword(p, buf, sz, pos, ',');
777: continue;
778: }
779:
780: cmd = texicmd(p, buf, *pos, sz, &end);
781: advanceto(p, buf, pos, end);
782: if (TEXICMD__MAX == cmd)
783: continue;
784: if (NULL != texitoks[cmd].fp)
785: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
786: }
787: return(0);
788: }
789:
790: /*
791: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
792: * This will stop in the event of EOF or if we're not at a bracket.
793: */
1.1 kristaps 794: static void
795: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
796: {
797: size_t end;
798: enum texicmd cmd;
799:
1.9 kristaps 800: while (*pos < sz && ismspace(buf[*pos]))
1.3 kristaps 801: advance(p, buf, pos);
802:
1.1 kristaps 803: if (*pos == sz || '{' != buf[*pos])
804: return;
805: advance(p, buf, pos);
806:
807: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
808: switch (buf[*pos]) {
809: case ('}'):
810: advance(p, buf, pos);
811: return;
812: case ('{'):
1.3 kristaps 813: if (0 == p->ign)
814: texiwarn(p, "unexpected \"{\"");
815: advance(p, buf, pos);
816: continue;
817: case ('@'):
818: break;
819: default:
1.8 kristaps 820: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 821: continue;
822: }
823:
824: cmd = texicmd(p, buf, *pos, sz, &end);
825: advanceto(p, buf, pos, end);
826: if (TEXICMD__MAX == cmd)
827: continue;
828: if (NULL != texitoks[cmd].fp)
829: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
830: }
831: }
832:
833: /*
834: * This should be invoked when we're on a macro line and want to process
835: * to the end of the current input line, doing all of our macros along
836: * the way.
837: */
838: static void
839: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
840: {
841: size_t end;
842: enum texicmd cmd;
843:
844: assert(0 == p->literal);
845:
846: while (*pos < sz && '\n' != buf[*pos]) {
847: while (*pos < sz && isws(buf[*pos])) {
848: p->seenws = 1;
849: advance(p, buf, pos);
850: }
851: switch (buf[*pos]) {
852: case ('}'):
853: if (0 == p->ign)
854: texiwarn(p, "unexpected \"}\"");
855: advance(p, buf, pos);
856: continue;
857: case ('{'):
858: if (0 == p->ign)
859: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 860: advance(p, buf, pos);
861: continue;
862: case ('@'):
863: break;
864: default:
1.8 kristaps 865: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 866: continue;
867: }
868:
869: cmd = texicmd(p, buf, *pos, sz, &end);
870: advanceto(p, buf, pos, end);
871: if (TEXICMD__MAX == cmd)
872: continue;
873: if (NULL != texitoks[cmd].fp)
874: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
875: }
876: }
877:
1.8 kristaps 878: /*
879: * Parse a single word or command.
880: * This will return immediately at the EOF.
881: */
1.1 kristaps 882: static void
1.3 kristaps 883: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
884: {
885: size_t end;
886: enum texicmd cmd;
887:
888: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
889: return;
890:
891: switch (buf[*pos]) {
892: case ('}'):
893: if (0 == p->ign)
894: texiwarn(p, "unexpected \"}\"");
895: advance(p, buf, pos);
896: return;
897: case ('{'):
898: if (0 == p->ign)
899: texiwarn(p, "unexpected \"{\"");
900: advance(p, buf, pos);
901: return;
902: case ('@'):
903: break;
904: default:
1.8 kristaps 905: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 906: return;
907: }
908:
909: cmd = texicmd(p, buf, *pos, sz, &end);
910: advanceto(p, buf, pos, end);
911: if (TEXICMD__MAX == cmd)
912: return;
913: if (NULL != texitoks[cmd].fp)
914: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
915: }
916:
1.8 kristaps 917: /*
918: * Parse til the end of the buffer.
919: */
1.3 kristaps 920: static void
1.7 kristaps 921: parseeof(struct texi *p, const char *buf, size_t sz)
922: {
923: size_t pos;
924:
925: for (pos = 0; pos < sz; )
926: parsesingle(p, buf, sz, &pos);
927: }
928:
1.8 kristaps 929: /*
930: * Parse a block sequence until we have the "@end endtoken" command
931: * invocation.
932: * This will return immediately at EOF.
933: */
1.7 kristaps 934: static void
1.1 kristaps 935: parseto(struct texi *p, const char *buf,
936: size_t sz, size_t *pos, const char *endtoken)
937: {
938: size_t end;
939: enum texicmd cmd;
940: size_t endtoksz;
941:
942: endtoksz = strlen(endtoken);
943: assert(endtoksz > 0);
944:
945: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
946: switch (buf[*pos]) {
947: case ('}'):
1.3 kristaps 948: if (0 == p->ign)
949: texiwarn(p, "unexpected \"}\"");
1.1 kristaps 950: advance(p, buf, pos);
951: continue;
952: case ('{'):
1.3 kristaps 953: if (0 == p->ign)
954: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 955: advance(p, buf, pos);
956: continue;
957: case ('@'):
958: break;
959: default:
1.8 kristaps 960: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 961: continue;
962: }
963:
964: cmd = texicmd(p, buf, *pos, sz, &end);
965: advanceto(p, buf, pos, end);
966: if (TEXICMD_END == cmd) {
1.2 kristaps 967: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 968: advance(p, buf, pos);
969: /*
1.8 kristaps 970: * FIXME: check the full word, not just its
971: * initial substring!
1.1 kristaps 972: */
973: if (sz - *pos >= endtoksz && 0 == strncmp
974: (&buf[*pos], endtoken, endtoksz)) {
1.3 kristaps 975: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 976: break;
977: }
1.3 kristaps 978: if (0 == p->ign)
979: texiwarn(p, "unexpected \"end\"");
980: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 981: continue;
982: } else if (TEXICMD__MAX != cmd)
983: if (NULL != texitoks[cmd].fp)
984: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
985: }
986: }
987:
1.8 kristaps 988: /*
989: * Memory-map the file "fname" and begin parsing it.
990: * This can be called in a nested context.
991: */
1.1 kristaps 992: static void
1.2 kristaps 993: parsefile(struct texi *p, const char *fname)
994: {
995: struct texifile *f;
996: int fd;
997: struct stat st;
998:
999: assert(p->filepos < 64);
1000: f = &p->files[p->filepos];
1001: memset(f, 0, sizeof(struct texifile));
1002:
1003: f->name = fname;
1004: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1005: texiabort(p, fname);
1006: } else if (-1 == fstat(fd, &st)) {
1007: close(fd);
1008: texiabort(p, fname);
1009: }
1010:
1011: f->mapsz = st.st_size;
1012: f->map = mmap(NULL, f->mapsz,
1013: PROT_READ, MAP_SHARED, fd, 0);
1014: close(fd);
1015:
1016: if (MAP_FAILED == f->map)
1017: texiabort(p, fname);
1018:
1019: p->filepos++;
1020: parseeof(p, f->map, f->mapsz);
1021: texifilepop(p);
1022: }
1023:
1024: static void
1.3 kristaps 1025: dodeftypevar(struct texi *p, enum texicmd cmd,
1026: const char *buf, size_t sz, size_t *pos)
1027: {
1028: const char *blk;
1029:
1030: blk = TEXICMD_DEFTYPEVR == cmd ?
1031: "deftypevr" : "deftypevar";
1032:
1033: if (p->ign) {
1034: parseto(p, buf, sz, pos, blk);
1035: return;
1036: }
1037:
1.11 ! kristaps 1038: texivspace(p);
1.3 kristaps 1039: if (TEXICMD_DEFTYPEVR == cmd) {
1040: parsebracket(p, buf, sz, pos);
1041: texiputchars(p, ":\n");
1042: }
1.5 kristaps 1043: teximacroopen(p, "Vt");
1.4 kristaps 1044: parseeoln(p, buf, sz, pos);
1.3 kristaps 1045: teximacroclose(p);
1.11 ! kristaps 1046: texivspace(p);
1.3 kristaps 1047: parseto(p, buf, sz, pos, blk);
1048: }
1049:
1050: static void
1051: dodeftypefun(struct texi *p, enum texicmd cmd,
1052: const char *buf, size_t sz, size_t *pos)
1053: {
1054: const char *blk;
1055:
1.5 kristaps 1056: blk = NULL;
1.3 kristaps 1057: switch (cmd) {
1058: case (TEXICMD_DEFTYPEFN):
1059: case (TEXICMD_DEFTYPEFUN):
1.5 kristaps 1060: blk = texitoks[cmd].tok;
1.3 kristaps 1061: break;
1.5 kristaps 1062: default:
1.3 kristaps 1063: break;
1064: }
1065:
1066: if (p->ign) {
1067: if (NULL != blk)
1068: parseto(p, buf, sz, pos, blk);
1069: return;
1070: }
1071:
1072: switch (cmd) {
1073: case (TEXICMD_DEFTYPEFN):
1074: case (TEXICMD_DEFTYPEFUN):
1.11 ! kristaps 1075: texivspace(p);
1.3 kristaps 1076: break;
1077: default:
1078: break;
1079: }
1080: if (TEXICMD_DEFTYPEFN == cmd ||
1081: TEXICMD_DEFTYPEFNX == cmd) {
1082: parsebracket(p, buf, sz, pos);
1083: texiputchars(p, ":\n");
1084: }
1.5 kristaps 1085: teximacroopen(p, "Ft");
1.3 kristaps 1086: parsesingle(p, buf, sz, pos);
1087: teximacroclose(p);
1.5 kristaps 1088: teximacroopen(p, "Fn");
1.3 kristaps 1089: parsesingle(p, buf, sz, pos);
1090: teximacroclose(p);
1.5 kristaps 1091: teximacroopen(p, "Li");
1.4 kristaps 1092: parseeoln(p, buf, sz, pos);
1.3 kristaps 1093: teximacroclose(p);
1.11 ! kristaps 1094: texivspace(p);
1.3 kristaps 1095: if (NULL != blk)
1096: parseto(p, buf, sz, pos, blk);
1097: }
1098:
1099: static void
1.1 kristaps 1100: doignblock(struct texi *p, enum texicmd cmd,
1101: const char *buf, size_t sz, size_t *pos)
1102: {
1103:
1.3 kristaps 1104: p->ign++;
1.5 kristaps 1105: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1106: p->ign--;
1.1 kristaps 1107: }
1108:
1109: static void
1.3 kristaps 1110: doblock(struct texi *p, enum texicmd cmd,
1.1 kristaps 1111: const char *buf, size_t sz, size_t *pos)
1112: {
1113:
1.5 kristaps 1114: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.1 kristaps 1115: }
1116:
1117: static void
1118: doinline(struct texi *p, const char *buf,
1119: size_t sz, size_t *pos, const char *macro)
1120: {
1121:
1.5 kristaps 1122: teximacroopen(p, macro);
1.1 kristaps 1123: p->seenws = 0;
1124: parsebracket(p, buf, sz, pos);
1.11 ! kristaps 1125: texipunctuate(p, buf, sz, pos);
1.5 kristaps 1126: teximacroclose(p);
1.1 kristaps 1127: }
1128:
1129: static void
1.2 kristaps 1130: doinclude(struct texi *p, enum texicmd cmd,
1131: const char *buf, size_t sz, size_t *pos)
1132: {
1133: char fname[PATH_MAX], path[PATH_MAX];
1134: size_t i;
1135: int rc;
1136:
1137: while (*pos < sz && ' ' == buf[*pos])
1138: advance(p, buf, pos);
1139:
1140: /* Read in the filename. */
1141: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1142: if (i == sizeof(fname) - 1)
1143: break;
1144: fname[i] = buf[*pos];
1145: advance(p, buf, pos);
1146: }
1147:
1148: if (i == 0)
1149: texierr(p, "path too short");
1150: else if ('\n' != buf[*pos])
1151: texierr(p, "path too long");
1152: else if ('/' == fname[0])
1153: texierr(p, "no absolute paths");
1154: fname[i] = '\0';
1155:
1156: if (strstr(fname, "../") || strstr(fname, "/.."))
1157: texierr(p, "insecure path");
1158:
1.5 kristaps 1159: for (i = 0; i < p->dirsz; i++) {
1160: rc = snprintf(path, sizeof(path),
1161: "%s/%s", p->dirs[i], fname);
1162: if (rc < 0)
1163: texierr(p, "couldn't format path");
1164: else if ((size_t)rc >= sizeof(path))
1165: texierr(p, "path too long");
1166: else if (-1 == access(path, R_OK))
1167: continue;
1168:
1169: parsefile(p, path);
1170: return;
1171: }
1.2 kristaps 1172:
1.5 kristaps 1173: texierr(p, "couldn't find %s in includes", fname);
1.2 kristaps 1174: }
1175:
1176: static void
1.1 kristaps 1177: doitalic(struct texi *p, enum texicmd cmd,
1178: const char *buf, size_t sz, size_t *pos)
1179: {
1180:
1181: texiputchars(p, "\\fI");
1182: parsebracket(p, buf, sz, pos);
1183: texiputchars(p, "\\fP");
1184: }
1185:
1186: static void
1.3 kristaps 1187: doenv(struct texi *p, enum texicmd cmd,
1188: const char *buf, size_t sz, size_t *pos)
1189: {
1190:
1191: if (p->literal)
1192: parsebracket(p, buf, sz, pos);
1193: else
1194: doinline(p, buf, sz, pos, "Ev");
1195: }
1196:
1197: static void
1.1 kristaps 1198: doliteral(struct texi *p, enum texicmd cmd,
1199: const char *buf, size_t sz, size_t *pos)
1200: {
1201:
1.3 kristaps 1202: if (p->literal)
1.1 kristaps 1203: parsebracket(p, buf, sz, pos);
1204: else
1205: doinline(p, buf, sz, pos, "Li");
1206: }
1207:
1208: static void
1209: doemph(struct texi *p, enum texicmd cmd,
1210: const char *buf, size_t sz, size_t *pos)
1211: {
1212:
1.3 kristaps 1213: if (p->literal)
1.1 kristaps 1214: doitalic(p, cmd, buf, sz, pos);
1215: else
1216: doinline(p, buf, sz, pos, "Em");
1217: }
1218:
1219: static void
1220: docommand(struct texi *p, enum texicmd cmd,
1221: const char *buf, size_t sz, size_t *pos)
1222: {
1223:
1224: doinline(p, buf, sz, pos, "Xr");
1225: }
1226:
1227: static void
1228: dobracket(struct texi *p, enum texicmd cmd,
1229: const char *buf, size_t sz, size_t *pos)
1230: {
1231:
1232: parsebracket(p, buf, sz, pos);
1233: }
1234:
1235: static void
1236: dofile(struct texi *p, enum texicmd cmd,
1237: const char *buf, size_t sz, size_t *pos)
1238: {
1239:
1.3 kristaps 1240: if (p->literal)
1.1 kristaps 1241: parsebracket(p, buf, sz, pos);
1242: else
1243: doinline(p, buf, sz, pos, "Pa");
1244: }
1245:
1246: static void
1.3 kristaps 1247: dodisplay(struct texi *p, enum texicmd cmd,
1248: const char *buf, size_t sz, size_t *pos)
1249: {
1250:
1.5 kristaps 1251: teximacro(p, "Bd -display -offset indent");
1.11 ! kristaps 1252: p->seenvs = 1;
1.3 kristaps 1253: advanceeoln(p, buf, sz, pos, 1);
1254: parseto(p, buf, sz, pos, "display");
1.5 kristaps 1255: teximacro(p, "Ed");
1.3 kristaps 1256: }
1257:
1258: static void
1.1 kristaps 1259: doexample(struct texi *p, enum texicmd cmd,
1260: const char *buf, size_t sz, size_t *pos)
1261: {
1.3 kristaps 1262: const char *blk;
1263:
1264: blk = TEXICMD_EXAMPLE == cmd ? "example" : "smallexample";
1.1 kristaps 1265:
1.5 kristaps 1266: teximacro(p, "Bd -literal -offset indent");
1.3 kristaps 1267: advanceeoln(p, buf, sz, pos, 1);
1268: p->literal++;
1269: parseto(p, buf, sz, pos, blk);
1270: p->literal--;
1.5 kristaps 1271: teximacro(p, "Ed");
1.1 kristaps 1272: }
1273:
1274: static void
1275: dobye(struct texi *p, enum texicmd cmd,
1276: const char *buf, size_t sz, size_t *pos)
1277: {
1278:
1279: texiexit(p);
1280: exit(EXIT_SUCCESS);
1281: }
1282:
1283: static void
1.10 kristaps 1284: dotitle(struct texi *p, enum texicmd cmd,
1285: const char *buf, size_t sz, size_t *pos)
1286: {
1287: size_t start, end;
1288:
1289: while (*pos < sz && isws(buf[*pos]))
1290: advance(p, buf, pos);
1291: start = end = *pos;
1292: while (end < sz && '\n' != buf[end])
1293: end++;
1294: free(p->subtitle);
1295: p->subtitle = malloc(end - start + 1);
1296: memcpy(p->subtitle, &buf[start], end - start);
1297: p->subtitle[end - start] = '\0';
1298: }
1299:
1300: static void
1.1 kristaps 1301: dosymbol(struct texi *p, enum texicmd cmd,
1302: const char *buf, size_t sz, size_t *pos)
1303: {
1304:
1.3 kristaps 1305: if (p->seenws && p->outcol && 0 == p->literal) {
1306: texiputchar(p, ' ');
1307: p->seenws = 0;
1308: }
1309:
1.1 kristaps 1310: switch (cmd) {
1.3 kristaps 1311: case (TEXICMD_ASTERISK):
1312: case (TEXICMD_NEWLINE):
1313: case (TEXICMD_SPACE):
1314: case (TEXICMD_TAB):
1315: texiputchar(p, ' ');
1316: break;
1.1 kristaps 1317: case (TEXICMD_AT):
1.3 kristaps 1318: texiputchar(p, '@');
1319: break;
1320: case (TEXICMD_BANG):
1321: texiputchar(p, '!');
1.7 kristaps 1322: break;
1323: case (TEXICMD_BULLET):
1324: texiputchars(p, "\\(bu");
1.1 kristaps 1325: break;
1326: case (TEXICMD_COPYRIGHT):
1327: texiputchars(p, "\\(co");
1328: break;
1.2 kristaps 1329: case (TEXICMD_DOTS):
1330: texiputchars(p, "...");
1331: break;
1.1 kristaps 1332: case (TEXICMD_LATEX):
1333: texiputchars(p, "LaTeX");
1334: break;
1.3 kristaps 1335: case (TEXICMD_QUESTIONMARK):
1336: texiputchar(p, '?');
1337: break;
1338: case (TEXICMD_SQUIGGLE_LEFT):
1339: texiputchars(p, "{");
1340: break;
1341: case (TEXICMD_SQUIGGLE_RIGHT):
1342: texiputchars(p, "}");
1343: break;
1.1 kristaps 1344: case (TEXICMD_TEXSYM):
1345: texiputchars(p, "TeX");
1346: break;
1.3 kristaps 1347: case (TEXICMD_COLON):
1348: case (TEXICMD_HYPHEN):
1349: break;
1.1 kristaps 1350: default:
1.5 kristaps 1351: texiwarn(p, "sym: %d", cmd);
1.1 kristaps 1352: abort();
1353: }
1354:
1.5 kristaps 1355: if (texitoks[cmd].len > 1)
1356: doignbracket(p, cmd, buf, sz, pos);
1.1 kristaps 1357: }
1358:
1359: static void
1360: doquotation(struct texi *p, enum texicmd cmd,
1361: const char *buf, size_t sz, size_t *pos)
1362: {
1363:
1.5 kristaps 1364: teximacro(p, "Qo");
1.1 kristaps 1365: parseto(p, buf, sz, pos, "quotation");
1.5 kristaps 1366: teximacro(p, "Qc");
1.1 kristaps 1367: }
1368:
1.3 kristaps 1369: static void
1370: domath(struct texi *p, enum texicmd cmd,
1371: const char *buf, size_t sz, size_t *pos)
1372: {
1373: size_t nest;
1374:
1375: /*
1376: * Math handling is different from everything else.
1377: * We don't allow any subcomponents, and we ignore the rules in
1378: * terms of @-commands.
1379: * This departs from GNU's rules, but whatever.
1380: */
1381: while (*pos < sz && isws(buf[*pos]))
1382: advance(p, buf, pos);
1383: if (*pos == sz || '{' != buf[*pos])
1384: return;
1385: advance(p, buf, pos);
1386: if (p->seenws && p->outcol && 0 == p->literal)
1387: texiputchar(p, ' ');
1388: p->seenws = 0;
1389: for (nest = 1; *pos < sz && nest > 0; ) {
1390: if ('{' == buf[*pos])
1391: nest++;
1392: else if ('}' == buf[*pos])
1393: if (0 == --nest)
1394: continue;
1395: texiputchar(p, buf[*pos]);
1396: advance(p, buf, pos);
1397: }
1398: if (*pos == sz)
1399: return;
1400: assert('}' == buf[*pos]);
1401: advance(p, buf, pos);
1402: }
1403:
1.1 kristaps 1404: static void
1.8 kristaps 1405: dolink(struct texi *p, enum texicmd cmd,
1.1 kristaps 1406: const char *buf, size_t sz, size_t *pos)
1407: {
1.8 kristaps 1408: int c;
1.1 kristaps 1409:
1410: switch (cmd) {
1411: case (TEXICMD_EMAIL):
1.5 kristaps 1412: teximacroopen(p, "Mt");
1.1 kristaps 1413: break;
1.3 kristaps 1414: case (TEXICMD_UREF):
1.1 kristaps 1415: case (TEXICMD_URL):
1.5 kristaps 1416: teximacroopen(p, "Lk");
1.1 kristaps 1417: break;
1.8 kristaps 1418: case (TEXICMD_XREF):
1419: texiputchars(p, "See Section");
1420: teximacroopen(p, "Qq");
1421: break;
1422: case (TEXICMD_PXREF):
1423: texiputchars(p, "see Section");
1424: teximacroopen(p, "Qq");
1425: break;
1.1 kristaps 1426: default:
1.8 kristaps 1427: abort();
1.1 kristaps 1428: }
1.8 kristaps 1429:
1430: c = parsearg(p, buf, sz, pos, 0);
1431: p->ign++;
1432: while (c > 0)
1433: c = parsearg(p, buf, sz, pos, 1);
1434: p->ign--;
1435:
1.11 ! kristaps 1436: texipunctuate(p, buf, sz, pos);
1.8 kristaps 1437: teximacroclose(p);
1438: }
1439:
1440: static void
1441: doignargn(struct texi *p, enum texicmd cmd,
1442: const char *buf, size_t sz, size_t *pos)
1443: {
1444: int c;
1445:
1446: c = parsearg(p, buf, sz, pos, 0);
1447: p->ign++;
1448: while (c > 0)
1449: c = parsearg(p, buf, sz, pos, 1);
1450: p->ign--;
1.1 kristaps 1451: }
1452:
1453: static void
1454: dosubsection(struct texi *p, enum texicmd cmd,
1455: const char *buf, size_t sz, size_t *pos)
1456: {
1457:
1.11 ! kristaps 1458: texivspace(p);
1.5 kristaps 1459: teximacroopen(p, "Em");
1.3 kristaps 1460: parseeoln(p, buf, sz, pos);
1.5 kristaps 1461: teximacroclose(p);
1.11 ! kristaps 1462: texivspace(p);
1.1 kristaps 1463: }
1464:
1465: static void
1466: dosection(struct texi *p, enum texicmd cmd,
1467: const char *buf, size_t sz, size_t *pos)
1468: {
1469:
1.3 kristaps 1470: if (p->outmacro)
1471: texierr(p, "subsection in open line scope!?");
1472: else if (p->literal)
1473: texierr(p, "subsection in a literal scope!?");
1474:
1.5 kristaps 1475: teximacroopen(p, "Ss");
1.3 kristaps 1476: parseeoln(p, buf, sz, pos);
1477: teximacroclose(p);
1.11 ! kristaps 1478: p->seenvs = 1;
1.3 kristaps 1479: }
1480:
1481: static void
1482: dosp(struct texi *p, enum texicmd cmd,
1483: const char *buf, size_t sz, size_t *pos)
1484: {
1485:
1.11 ! kristaps 1486: texivspace(p);
1.3 kristaps 1487: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1488: }
1489:
1490: static void
1.3 kristaps 1491: dochapter(struct texi *p, enum texicmd cmd,
1.1 kristaps 1492: const char *buf, size_t sz, size_t *pos)
1493: {
1494:
1.3 kristaps 1495: if (p->outmacro)
1496: texierr(p, "section in open line scope!?");
1497: else if (p->literal)
1498: texierr(p, "section in a literal scope!?");
1499:
1.5 kristaps 1500: teximacroopen(p, "Sh");
1.3 kristaps 1501: parseeoln(p, buf, sz, pos);
1502: teximacroclose(p);
1.11 ! kristaps 1503: p->seenvs = 1;
1.1 kristaps 1504: }
1505:
1506: static void
1507: dotop(struct texi *p, enum texicmd cmd,
1508: const char *buf, size_t sz, size_t *pos)
1509: {
1.10 kristaps 1510: const char *cp;
1511: time_t t;
1512: char date[32];
1513:
1514: t = time(NULL);
1515: strftime(date, sizeof(date), "%F", localtime(&t));
1.1 kristaps 1516:
1.3 kristaps 1517: p->ign--;
1.10 kristaps 1518: teximacroopen(p, "Dd");
1519: texiputchars(p, date);
1520: teximacroclose(p);
1521: teximacroopen(p, "Dt");
1522: for (cp = p->title; '\0' != *cp; cp++)
1523: texiputchar(p, toupper(*cp));
1.11 ! kristaps 1524: texiputchars(p, " 7");
1.10 kristaps 1525: teximacroclose(p);
1.5 kristaps 1526: teximacro(p, "Os");
1527: teximacro(p, "Sh NAME");
1.10 kristaps 1528: teximacroopen(p, "Nm");
1529: texiputchars(p, p->title);
1530: teximacroclose(p);
1531: teximacroopen(p, "Nd");
1532: texiputchars(p, NULL != p->subtitle ?
1533: p->subtitle : "Unknown description");
1534: teximacroclose(p);
1.11 ! kristaps 1535: p->seenvs = 1;
! 1536: dochapter(p, cmd, buf, sz, pos);
1.1 kristaps 1537: }
1538:
1539: static void
1540: doitem(struct texi *p, enum texicmd cmd,
1541: const char *buf, size_t sz, size_t *pos)
1542: {
1543:
1.3 kristaps 1544: if (p->outmacro)
1545: texierr(p, "item in open line scope!?");
1546: else if (p->literal)
1547: texierr(p, "item in a literal scope!?");
1548:
1549: switch (p->list) {
1550: case (TEXILIST_ITEM):
1.5 kristaps 1551: teximacroopen(p, "It");
1.3 kristaps 1552: break;
1553: case (TEXILIST_NOITEM):
1.5 kristaps 1554: teximacro(p, "It");
1.3 kristaps 1555: break;
1556: default:
1.11 ! kristaps 1557: texivspace(p);
1.3 kristaps 1558: break;
1559: }
1.11 ! kristaps 1560: p->seenvs = 1;
1.3 kristaps 1561:
1562: parseeoln(p, buf, sz, pos);
1.1 kristaps 1563:
1.3 kristaps 1564: if (TEXILIST_ITEM == p->list)
1565: teximacroclose(p);
1.9 kristaps 1566: else if (p->outcol > 0)
1.1 kristaps 1567: texiputchar(p, '\n');
1568: }
1569:
1570: static void
1571: dotable(struct texi *p, enum texicmd cmd,
1572: const char *buf, size_t sz, size_t *pos)
1573: {
1.3 kristaps 1574: enum texilist sv = p->list;
1575:
1576: p->list = TEXILIST_ITEM;
1.5 kristaps 1577: teximacro(p, "Bl -tag -width Ds");
1.11 ! kristaps 1578: p->seenvs = 1;
1.1 kristaps 1579: parseto(p, buf, sz, pos, "table");
1.5 kristaps 1580: teximacro(p, "El");
1.3 kristaps 1581: p->list = sv;
1.1 kristaps 1582: }
1583:
1584: static void
1.2 kristaps 1585: doenumerate(struct texi *p, enum texicmd cmd,
1586: const char *buf, size_t sz, size_t *pos)
1587: {
1.3 kristaps 1588: enum texilist sv = p->list;
1.2 kristaps 1589:
1.3 kristaps 1590: p->list = TEXILIST_NOITEM;
1.5 kristaps 1591: teximacro(p, "Bl -enum");
1.11 ! kristaps 1592: p->seenvs = 1;
1.2 kristaps 1593: parseto(p, buf, sz, pos, "enumerate");
1.5 kristaps 1594: teximacro(p, "El");
1.3 kristaps 1595: p->list = sv;
1.2 kristaps 1596: }
1597:
1598: static void
1.1 kristaps 1599: doitemize(struct texi *p, enum texicmd cmd,
1600: const char *buf, size_t sz, size_t *pos)
1601: {
1.3 kristaps 1602: enum texilist sv = p->list;
1.1 kristaps 1603:
1.3 kristaps 1604: p->list = TEXILIST_ITEM;
1.5 kristaps 1605: teximacro(p, "Bl -bullet");
1.11 ! kristaps 1606: p->seenvs = 1;
1.1 kristaps 1607: parseto(p, buf, sz, pos, "itemize");
1.5 kristaps 1608: teximacro(p, "El");
1.3 kristaps 1609: p->list = sv;
1.1 kristaps 1610: }
1611:
1612: static void
1613: doignbracket(struct texi *p, enum texicmd cmd,
1614: const char *buf, size_t sz, size_t *pos)
1615: {
1616:
1.3 kristaps 1617: p->ign++;
1.1 kristaps 1618: parsebracket(p, buf, sz, pos);
1.3 kristaps 1619: p->ign--;
1.1 kristaps 1620: }
1621:
1622: static void
1623: doignline(struct texi *p, enum texicmd cmd,
1624: const char *buf, size_t sz, size_t *pos)
1625: {
1626:
1.3 kristaps 1627: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1628: }
1629:
1.8 kristaps 1630: /*
1631: * Parse colon-separated directories from "cp" (if not NULL) and returns
1632: * the array of pointers.
1633: * Prepends "base" to the array.
1634: * This does NOT sanitise the directories!
1635: */
1.5 kristaps 1636: static char **
1637: parsedirs(const char *base, const char *cp, size_t *sz)
1638: {
1639: char *tok, *str, *tofree;
1640: const char *cpp;
1641: size_t i;
1642: char **dirs;
1643:
1644: *sz = NULL != (cpp = cp) ? 2 : 1;
1645: if (*sz > 1)
1646: for ( ; NULL != (cpp = strchr(cpp, ':')); (*sz)++)
1647: cpp++;
1648:
1649: dirs = calloc(*sz, sizeof(char *));
1650: if (NULL == dirs) {
1651: perror(NULL);
1652: exit(EXIT_FAILURE);
1653: } else if (NULL == (dirs[0] = strdup(base))) {
1654: perror(NULL);
1655: exit(EXIT_FAILURE);
1656: }
1657:
1658: if (NULL == cp)
1659: return(dirs);
1660:
1661: if (NULL == (tofree = tok = str = strdup(cp))) {
1662: perror(NULL);
1663: exit(EXIT_FAILURE);
1664: }
1665:
1666: for (i = 1; NULL != (tok = strsep(&str, ":")); i++)
1667: if (NULL == (dirs[i] = strdup(tok))) {
1668: perror(NULL);
1669: exit(EXIT_FAILURE);
1670: }
1671:
1672: free(tofree);
1673: return(dirs);
1674: }
1675:
1.1 kristaps 1676: int
1677: main(int argc, char *argv[])
1678: {
1679: struct texi texi;
1.2 kristaps 1680: int c;
1681: char *path, *dir;
1.10 kristaps 1682: const char *progname, *Idir, *cp;
1.1 kristaps 1683:
1684: progname = strrchr(argv[0], '/');
1685: if (progname == NULL)
1686: progname = argv[0];
1687: else
1688: ++progname;
1689:
1.10 kristaps 1690: memset(&texi, 0, sizeof(struct texi));
1.5 kristaps 1691: Idir = NULL;
1.10 kristaps 1692:
1.5 kristaps 1693: while (-1 != (c = getopt(argc, argv, "I:")))
1.1 kristaps 1694: switch (c) {
1.5 kristaps 1695: case ('I'):
1696: Idir = optarg;
1697: break;
1.1 kristaps 1698: default:
1699: goto usage;
1700: }
1701:
1702: argv += optind;
1703: if (0 == (argc -= optind))
1704: goto usage;
1705:
1.2 kristaps 1706: if (NULL == (path = strdup(argv[0]))) {
1707: perror(NULL);
1708: exit(EXIT_FAILURE);
1709: } else if (NULL == (dir = dirname(path))) {
1710: perror(argv[0]);
1711: free(path);
1712: exit(EXIT_FAILURE);
1713: }
1714: free(path);
1715:
1.10 kristaps 1716: if (NULL != (cp = strrchr(argv[0], '/')))
1717: texi.title = strdup(cp + 1);
1718: else
1719: texi.title = strdup(argv[0]);
1720:
1721: if (NULL == texi.title) {
1722: perror(NULL);
1723: exit(EXIT_FAILURE);
1724: } else if (NULL != (path = strchr(texi.title, '.')))
1725: *path = '\0';
1726:
1.3 kristaps 1727: texi.ign = 1;
1.5 kristaps 1728: texi.dirs = parsedirs(dir, Idir, &texi.dirsz);
1.2 kristaps 1729: parsefile(&texi, argv[0]);
1.5 kristaps 1730: /* We shouldn't get here. */
1.2 kristaps 1731: texiexit(&texi);
1732: return(EXIT_FAILURE);
1.1 kristaps 1733: usage:
1.8 kristaps 1734: fprintf(stderr, "usage: %s [-Idirs] file\n", progname);
1.1 kristaps 1735: return(EXIT_FAILURE);
1736: }
CVSweb