Annotation of texi2mdoc/main.c, Revision 1.21
1.21 ! kristaps 1: /* $Id: main.c,v 1.20 2015/02/19 16:15:17 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2015 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
1.2 kristaps 24: #include <libgen.h>
25: #include <limits.h>
1.1 kristaps 26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
1.10 kristaps 30: #include <time.h>
1.6 kristaps 31: #include <unistd.h>
1.1 kristaps 32:
33: /*
34: * This defines each one of the Texinfo commands that we understand.
35: * Obviously this only refers to native commands; overriden names are a
36: * different story.
37: */
38: enum texicmd {
1.2 kristaps 39: TEXICMD_ACRONYM,
1.16 kristaps 40: TEXICMD_ACUTE,
1.1 kristaps 41: TEXICMD_A4PAPER,
42: TEXICMD_ANCHOR,
1.2 kristaps 43: TEXICMD_APPENDIX,
44: TEXICMD_APPENDIXSEC,
1.20 kristaps 45: TEXICMD_APPENDIXSUBSEC,
1.3 kristaps 46: TEXICMD_ASTERISK,
1.1 kristaps 47: TEXICMD_AT,
1.3 kristaps 48: TEXICMD_AUTHOR,
1.12 kristaps 49: TEXICMD_B,
1.3 kristaps 50: TEXICMD_BANG,
1.7 kristaps 51: TEXICMD_BULLET,
1.1 kristaps 52: TEXICMD_BYE,
1.5 kristaps 53: TEXICMD_CENTER,
1.1 kristaps 54: TEXICMD_CHAPTER,
55: TEXICMD_CINDEX,
1.16 kristaps 56: TEXICMD_CIRCUMFLEX,
1.3 kristaps 57: TEXICMD_CITE,
1.1 kristaps 58: TEXICMD_CODE,
1.3 kristaps 59: TEXICMD_COLON,
1.18 kristaps 60: TEXICMD_COLUMNFRACTIONS,
1.1 kristaps 61: TEXICMD_COMMAND,
62: TEXICMD_COMMENT,
1.2 kristaps 63: TEXICMD_COMMENT_LONG,
1.1 kristaps 64: TEXICMD_CONTENTS,
65: TEXICMD_COPYING,
66: TEXICMD_COPYRIGHT,
1.15 kristaps 67: TEXICMD_DEFFN,
68: TEXICMD_DEFFNX,
69: TEXICMD_DEFMAC,
70: TEXICMD_DEFMACX,
71: TEXICMD_DEFTP,
72: TEXICMD_DEFTPX,
1.3 kristaps 73: TEXICMD_DEFTYPEFN,
74: TEXICMD_DEFTYPEFNX,
75: TEXICMD_DEFTYPEFUN,
76: TEXICMD_DEFTYPEFUNX,
77: TEXICMD_DEFTYPEVAR,
1.15 kristaps 78: TEXICMD_DEFTYPEVARX,
1.3 kristaps 79: TEXICMD_DEFTYPEVR,
1.15 kristaps 80: TEXICMD_DEFTYPEVRX,
81: TEXICMD_DEFUN,
82: TEXICMD_DEFUNX,
83: TEXICMD_DEFVAR,
84: TEXICMD_DEFVARX,
85: TEXICMD_DEFVR,
86: TEXICMD_DEFVRX,
1.1 kristaps 87: TEXICMD_DETAILMENU,
1.3 kristaps 88: TEXICMD_DFN,
1.1 kristaps 89: TEXICMD_DIRCATEGORY,
90: TEXICMD_DIRENTRY,
1.3 kristaps 91: TEXICMD_DISPLAY,
1.2 kristaps 92: TEXICMD_DOTS,
1.1 kristaps 93: TEXICMD_EMAIL,
94: TEXICMD_EMPH,
95: TEXICMD_END,
1.2 kristaps 96: TEXICMD_ENUMERATE,
1.3 kristaps 97: TEXICMD_ENV,
1.15 kristaps 98: TEXICMD_ERROR,
1.1 kristaps 99: TEXICMD_EXAMPLE,
1.17 kristaps 100: TEXICMD_EXPANSION,
1.1 kristaps 101: TEXICMD_FILE,
1.17 kristaps 102: TEXICMD_FINALOUT,
1.20 kristaps 103: TEXICMD_FINDEX,
104: TEXICMD_FTABLE,
105: TEXICMD_FORMAT,
1.16 kristaps 106: TEXICMD_GRAVE,
1.3 kristaps 107: TEXICMD_GROUP,
1.2 kristaps 108: TEXICMD_HEADING,
1.3 kristaps 109: TEXICMD_HEADINGS,
1.18 kristaps 110: TEXICMD_HEADITEM,
1.3 kristaps 111: TEXICMD_HYPHEN,
1.1 kristaps 112: TEXICMD_I,
1.3 kristaps 113: TEXICMD_IFCLEAR,
1.14 kristaps 114: TEXICMD_IFDOCBOOK,
1.1 kristaps 115: TEXICMD_IFHTML,
1.3 kristaps 116: TEXICMD_IFINFO,
1.14 kristaps 117: TEXICMD_IFNOTDOCBOOK,
118: TEXICMD_IFNOTHTML,
119: TEXICMD_IFNOTINFO,
120: TEXICMD_IFNOTPLAINTEXT,
1.1 kristaps 121: TEXICMD_IFNOTTEX,
1.14 kristaps 122: TEXICMD_IFNOTXML,
123: TEXICMD_IFPLAINTEXT,
1.1 kristaps 124: TEXICMD_IFTEX,
1.3 kristaps 125: TEXICMD_IFSET,
1.14 kristaps 126: TEXICMD_IFXML,
1.17 kristaps 127: TEXICMD_IGNORE,
1.1 kristaps 128: TEXICMD_IMAGE,
1.2 kristaps 129: TEXICMD_INCLUDE,
1.13 kristaps 130: TEXICMD_INDENTBLOCK,
1.5 kristaps 131: TEXICMD_INSERTCOPYING,
1.1 kristaps 132: TEXICMD_ITEM,
133: TEXICMD_ITEMIZE,
1.20 kristaps 134: TEXICMD_ITEMX,
135: TEXICMD_KBD,
1.18 kristaps 136: TEXICMD_KEY,
1.20 kristaps 137: TEXICMD_KINDEX,
1.1 kristaps 138: TEXICMD_LATEX,
1.3 kristaps 139: TEXICMD_MATH,
1.1 kristaps 140: TEXICMD_MENU,
1.18 kristaps 141: TEXICMD_MULTITABLE,
1.15 kristaps 142: TEXICMD_NEED,
1.3 kristaps 143: TEXICMD_NEWLINE,
1.1 kristaps 144: TEXICMD_NODE,
1.3 kristaps 145: TEXICMD_NOINDENT,
1.16 kristaps 146: TEXICMD_OPTION,
1.8 kristaps 147: TEXICMD_PXREF,
1.3 kristaps 148: TEXICMD_QUESTIONMARK,
1.1 kristaps 149: TEXICMD_QUOTATION,
1.3 kristaps 150: TEXICMD_PAGE,
1.1 kristaps 151: TEXICMD_PARINDENT,
1.2 kristaps 152: TEXICMD_PRINTINDEX,
1.12 kristaps 153: TEXICMD_R,
1.1 kristaps 154: TEXICMD_REF,
1.15 kristaps 155: TEXICMD_RESULT,
1.1 kristaps 156: TEXICMD_SAMP,
1.12 kristaps 157: TEXICMD_SANSSERIF,
1.7 kristaps 158: TEXICMD_SC,
1.1 kristaps 159: TEXICMD_SECTION,
1.3 kristaps 160: TEXICMD_SET,
1.1 kristaps 161: TEXICMD_SETCHAPNEWPAGE,
162: TEXICMD_SETFILENAME,
163: TEXICMD_SETTITLE,
1.12 kristaps 164: TEXICMD_SLANTED,
1.3 kristaps 165: TEXICMD_SP,
166: TEXICMD_SPACE,
1.17 kristaps 167: TEXICMD_SMALLBOOK,
1.12 kristaps 168: TEXICMD_SMALLDISPLAY,
1.3 kristaps 169: TEXICMD_SMALLEXAMPLE,
1.20 kristaps 170: TEXICMD_SMALLFORMAT,
1.13 kristaps 171: TEXICMD_SMALLINDENTBLOCK,
1.3 kristaps 172: TEXICMD_SQUIGGLE_LEFT,
173: TEXICMD_SQUIGGLE_RIGHT,
1.8 kristaps 174: TEXICMD_STRONG,
1.20 kristaps 175: TEXICMD_SUBHEADING,
1.1 kristaps 176: TEXICMD_SUBSECTION,
1.3 kristaps 177: TEXICMD_SUBTITLE,
1.20 kristaps 178: TEXICMD_SYNCODEINDEX,
1.12 kristaps 179: TEXICMD_T,
1.3 kristaps 180: TEXICMD_TAB,
1.18 kristaps 181: TEXICMD_TABSYM,
1.1 kristaps 182: TEXICMD_TABLE,
183: TEXICMD_TEX,
184: TEXICMD_TEXSYM,
1.16 kristaps 185: TEXICMD_TILDE,
1.3 kristaps 186: TEXICMD_TITLE,
1.1 kristaps 187: TEXICMD_TITLEFONT,
188: TEXICMD_TITLEPAGE,
189: TEXICMD_TOP,
1.16 kristaps 190: TEXICMD_UMLAUT,
1.1 kristaps 191: TEXICMD_UNNUMBERED,
1.2 kristaps 192: TEXICMD_UNNUMBEREDSEC,
1.20 kristaps 193: TEXICMD_UNNUMBEREDSUBSEC,
1.3 kristaps 194: TEXICMD_UREF,
1.1 kristaps 195: TEXICMD_URL,
196: TEXICMD_VAR,
1.16 kristaps 197: TEXICMD_VERBATIMINCLUDE,
1.18 kristaps 198: TEXICMD_VINDEX,
1.9 kristaps 199: TEXICMD_VSKIP,
1.20 kristaps 200: TEXICMD_VTABLE,
1.3 kristaps 201: TEXICMD_W,
1.8 kristaps 202: TEXICMD_XREF,
1.1 kristaps 203: TEXICMD__MAX
204: };
205:
206: /*
207: * The file currently being parsed.
208: * This keeps track of our location within that file.
209: */
210: struct texifile {
211: const char *name; /* name of the file */
212: size_t line; /* current line (from zero) */
213: size_t col; /* current column in line (from zero) */
214: char *map; /* mmap'd file */
215: size_t mapsz; /* size of mmap */
216: };
217:
218: struct texi;
219:
1.2 kristaps 220: /*
221: * Callback for functions implementing texi commands.
222: */
1.1 kristaps 223: typedef void (*texicmdfp)(struct texi *,
224: enum texicmd, const char *, size_t, size_t *);
225:
226: /*
227: * Describes Texinfo commands, whether native or overriden.
228: */
229: struct texitok {
230: texicmdfp fp; /* callback (or NULL if none) */
231: const char *tok; /* name of the token */
232: size_t len; /* strlen(tok) */
233: };
234:
1.3 kristaps 235: enum texilist {
236: TEXILIST_NONE = 0,
237: TEXILIST_ITEM,
238: TEXILIST_NOITEM,
1.18 kristaps 239: TEXILIST_TABLE
1.3 kristaps 240: };
241:
1.1 kristaps 242: /*
243: * The main parse structure.
244: * This keeps any necessary information handy.
245: */
246: struct texi {
1.5 kristaps 247: struct texifile files[64]; /* stack of open files */
248: size_t filepos; /* number of open files */
249: size_t outcol; /* column in output line */
250: char **dirs; /* texi directories */
251: size_t dirsz; /* number of texi directories */
1.11 kristaps 252: char *title; /* title of document */
253: char *subtitle; /* subtitle of document */
254: /*
255: * The following control what we output to the screen.
256: * The complexity is required to accomodate for mdoc(7).
257: */
1.8 kristaps 258: enum texilist list; /* current list (set recursively) */
259: int outmacro; /* if >0, output is in line macro */
260: int seenws; /* ws has been seen (and ignored) */
1.11 kristaps 261: int seenvs; /* newline has been Pp'd */
1.8 kristaps 262: int ign; /* if >0, don't print anything */
263: int literal; /* if >0, literal context */
1.1 kristaps 264: };
265:
1.8 kristaps 266: /* Texi disregards spaces and tabs. */
1.2 kristaps 267: #define isws(_x) \
268: (' ' == (_x) || '\t' == (_x))
1.9 kristaps 269: #define ismspace(_x) \
1.10 kristaps 270: (isws((_x)) || '\n' == (_x))
1.1 kristaps 271:
1.16 kristaps 272: static void doaccent(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 273: static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 274: static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
275: static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.15 kristaps 276: static void dodefn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 277: static void dodisplay(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 278: static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 279: static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 280: static void doignargn(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 281: static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *);
282: static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *);
283: static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.12 kristaps 284: static void doinline(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.2 kristaps 285: static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 286: static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
287: static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.8 kristaps 288: static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 289: static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.18 kristaps 290: static void domultitable(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 291: static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
292: static void dotable(struct texi *, enum texicmd, const char *, size_t, size_t *);
293: static void dotop(struct texi *, enum texicmd, const char *, size_t, size_t *);
294: static void dosection(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.3 kristaps 295: static void dosp(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 296: static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *);
297: static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.18 kristaps 298: static void dotab(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.10 kristaps 299: static void dotitle(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.16 kristaps 300: static void doverbinclude(struct texi *, enum texicmd, const char *, size_t, size_t *);
1.1 kristaps 301:
302: static const struct texitok texitoks[TEXICMD__MAX] = {
1.20 kristaps 303: /* TEXICMD__BEGIN */
1.8 kristaps 304: { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */
1.16 kristaps 305: { doaccent, "'", 1 }, /* TEXICMD_ACUTE */
1.1 kristaps 306: { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */
307: { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */
1.12 kristaps 308: { dosection, "appendix", 8 }, /* TEXICMD_APPENDIX */
309: { dosection, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */
1.20 kristaps 310: { dosubsection, "appendixsubsec", 14 }, /* TEXICMD_APPENDIXSUBSEC */
1.3 kristaps 311: { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */
1.1 kristaps 312: { dosymbol, "@", 1 }, /* TEXICMD_AT */
1.3 kristaps 313: { doignline, "author", 6 }, /* TEXICMD_AUTHOR */
1.21 ! kristaps 314: { doinline, "b", 1 }, /* TEXICMD_BOLD */
1.3 kristaps 315: { dosymbol, "!", 1 }, /* TEXICMD_BANG */
1.7 kristaps 316: { dosymbol, "bullet", 6 }, /* TEXICMD_BULLET */
1.1 kristaps 317: { dobye, "bye", 3 }, /* TEXICMD_BYE */
1.12 kristaps 318: { doignline, "center", 6 }, /* TEXICMD_CENTER */
319: { dosection, "chapter", 7 }, /* TEXICMD_CHAPTER */
1.1 kristaps 320: { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */
1.16 kristaps 321: { doaccent, "^", 1 }, /* TEXICMD_CIRCUMFLEX */
1.21 ! kristaps 322: { doinline, "code", 4 }, /* TEXICMD_CODE */
! 323: { doinline, "cite", 4 }, /* TEXICMD_CITE */
1.3 kristaps 324: { dosymbol, ":", 1 }, /* TEXICMD_COLON */
1.18 kristaps 325: { NULL, "columnfractions", 15 }, /* TEXICMD_COLUMNFRACTIONS */
1.12 kristaps 326: { doinline, "command", 7 }, /* TEXICMD_COMMAND */
1.1 kristaps 327: { doignline, "c", 1 }, /* TEXICMD_COMMENT */
1.2 kristaps 328: { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */
1.1 kristaps 329: { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */
330: { doignblock, "copying", 7 }, /* TEXICMD_COPYING */
331: { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */
1.15 kristaps 332: { dodefn, "deffn", 5 }, /* TEXICMD_DEFFN */
333: { dodefn, "deffnx", 6 }, /* TEXICMD_DEFFNX */
334: { dodefn, "defmac", 6 }, /* TEXICMD_DEFMAC */
335: { dodefn, "defmacx", 7 }, /* TEXICMD_DEFMACX */
336: { dodefn, "deftp", 5 }, /* TEXICMD_DEFTP */
337: { dodefn, "deftpx", 6 }, /* TEXICMD_DEFTPX */
338: { dodefn, "deftypefn", 9 }, /* TEXICMD_DEFTYPEFN */
339: { dodefn, "deftypefnx", 10 }, /* TEXICMD_DEFTYPEFNX */
340: { dodefn, "deftypefun", 10 }, /* TEXICMD_DEFTYPEFUN */
341: { dodefn, "deftypefunx", 11 }, /* TEXICMD_DEFTYPEFUNX */
342: { dodefn, "deftypevar", 10 }, /* TEXICMD_DEFTYPEVAR */
343: { dodefn, "deftypevarx", 11 }, /* TEXICMD_DEFTYPEVARX */
344: { dodefn, "deftypevr", 9 }, /* TEXICMD_DEFTYPEVR */
345: { dodefn, "deftypevrx", 10 }, /* TEXICMD_DEFTYPEVRX */
346: { dodefn, "defun", 5 }, /* TEXICMD_DEFUN */
347: { dodefn, "defunx", 6 }, /* TEXICMD_DEFUNX */
348: { dodefn, "defvar", 6 }, /* TEXICMD_DEFVAR */
349: { dodefn, "defvarx", 7 }, /* TEXICMD_DEFVARX */
350: { dodefn, "defvr", 5 }, /* TEXICMD_DEFVR */
351: { dodefn, "defvrx", 6 }, /* TEXICMD_DEFVRX */
1.1 kristaps 352: { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */
1.21 ! kristaps 353: { doinline, "dfn", 3 }, /* TEXICMD_DFN */
1.1 kristaps 354: { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */
355: { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */
1.3 kristaps 356: { dodisplay, "display", 7 }, /* TEXICMD_DISPLAY */
1.2 kristaps 357: { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */
1.8 kristaps 358: { dolink, "email", 5 }, /* TEXICMD_EMAIL */
1.21 ! kristaps 359: { doinline, "emph", 4 }, /* TEXICMD_EMPH */
1.1 kristaps 360: { NULL, "end", 3 }, /* TEXICMD_END */
1.2 kristaps 361: { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */
1.12 kristaps 362: { doinline, "env", 3 }, /* TEXICMD_ENV */
1.15 kristaps 363: { dosymbol, "error", 5 }, /* TEXICMD_ERROR */
1.1 kristaps 364: { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */
1.17 kristaps 365: { dosymbol, "expansion", 9 }, /* TEXICMD_EXPANSION */
1.12 kristaps 366: { doinline, "file", 4 }, /* TEXICMD_FILE */
1.17 kristaps 367: { doignline, "finalout", 8 }, /* TEXICMD_FINALOUT */
1.20 kristaps 368: { doignline, "findex", 6 }, /* TEXICMD_FINDEX */
369: { dotable, "ftable", 6 }, /* TEXICMD_FTABLE */
370: { dodisplay, "format", 6 }, /* TEXICMD_FORMAT */
1.16 kristaps 371: { doaccent, "`", 1 }, /* TEXICMD_GRAVE */
1.3 kristaps 372: { doblock, "group", 5 }, /* TEXICMD_GROUP */
1.2 kristaps 373: { dosection, "heading", 7 }, /* TEXICMD_HEADING */
1.3 kristaps 374: { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */
1.18 kristaps 375: { doitem, "headitem", 8 }, /* TEXICMD_HEADITEM */
1.3 kristaps 376: { dosymbol, "-", 1 }, /* TEXICMD_HYPHEN */
1.21 ! kristaps 377: { doinline, "i", 1 }, /* TEXICMD_I */
1.3 kristaps 378: { doignblock, "ifclear", 7 }, /* TEXICMD_IFCLEAR */
1.14 kristaps 379: { doignblock, "ifdocbook", 9 }, /* TEXICMD_IFDOCBOOK */
1.1 kristaps 380: { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */
1.3 kristaps 381: { doignblock, "ifinfo", 6 }, /* TEXICMD_IFINFO */
1.14 kristaps 382: { doblock, "ifnotdocbook", 12 }, /* TEXICMD_IFNOTDOCBOOK */
383: { doblock, "ifnothtml", 9 }, /* TEXICMD_IFNOTHTML */
384: { doblock, "ifnotinfo", 9 }, /* TEXICMD_IFNOTINFO */
385: { doignblock, "ifnotplaintext", 14 }, /* TEXICMD_IFNOTPLAINTEXT */
1.3 kristaps 386: { doblock, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */
1.14 kristaps 387: { doblock, "ifnotxml", 8 }, /* TEXICMD_IFNOTXML */
388: { doblock, "ifplaintext", 11 }, /* TEXICMD_IFPLAINTEXT */
1.1 kristaps 389: { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
1.3 kristaps 390: { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
1.14 kristaps 391: { doignblock, "ifxml", 5 }, /* TEXICMD_IFXML */
1.17 kristaps 392: { doignblock, "ignore", 6 }, /* TEXICMD_IGNORE */
1.1 kristaps 393: { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
1.2 kristaps 394: { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
1.13 kristaps 395: { dodisplay, "indentblock", 11 }, /* TEXICMD_INDENTBLOCK */
1.5 kristaps 396: { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
1.1 kristaps 397: { doitem, "item", 4 }, /* TEXICMD_ITEM */
398: { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
1.20 kristaps 399: { doitem, "itemx", 5 }, /* TEXICMD_ITEMX */
1.21 ! kristaps 400: { doinline, "kbd", 3 }, /* TEXICMD_KBD */
1.18 kristaps 401: { dobracket, "key", 3 }, /* TEXICMD_KEY */
1.20 kristaps 402: { doignline, "kindex", 6 }, /* TEXICMD_KINDEX */
1.1 kristaps 403: { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
1.3 kristaps 404: { domath, "math", 4 }, /* TEXICMD_MATH */
1.1 kristaps 405: { doignblock, "menu", 4 }, /* TEXICMD_MENU */
1.18 kristaps 406: { domultitable, "multitable", 10 }, /* TEXICMD_MULTITABLE */
1.15 kristaps 407: { doignline, "need", 4 }, /* TEXICMD_NEED */
1.3 kristaps 408: { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */
1.1 kristaps 409: { doignline, "node", 4 }, /* TEXICMD_NODE */
1.3 kristaps 410: { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */
1.16 kristaps 411: { doinline, "option", 6 }, /* TEXICMD_OPTION */
1.8 kristaps 412: { dolink, "pxref", 5 }, /* TEXICMD_PXREF */
1.3 kristaps 413: { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */
1.1 kristaps 414: { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */
1.3 kristaps 415: { doignline, "page", 4 }, /* TEXICMD_PAGE */
416: { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */
1.2 kristaps 417: { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */
1.21 ! kristaps 418: { doinline, "r", 1 }, /* TEXICMD_R */
1.1 kristaps 419: { dobracket, "ref", 3 }, /* TEXICMD_REF */
1.15 kristaps 420: { dosymbol, "result", 6 }, /* TEXICMD_RESULT */
1.21 ! kristaps 421: { doinline, "samp", 4 }, /* TEXICMD_SAMP */
! 422: { doinline, "sansserif", 9 }, /* TEXICMD_SANSSERIF */
1.7 kristaps 423: { dobracket, "sc", 2 }, /* TEXICMD_SC */
1.1 kristaps 424: { dosection, "section", 7 }, /* TEXICMD_SECTION */
1.3 kristaps 425: { doignline, "set", 3 }, /* TEXICMD_SET */
1.1 kristaps 426: { doignline, "setchapternewpage", 17 }, /* TEXICMD_SETCHAPNEWPAGE */
427: { doignline, "setfilename", 11 }, /* TEXICMD_SETFILENAME */
1.10 kristaps 428: { dotitle, "settitle", 8 }, /* TEXICMD_SETTITLE */
1.21 ! kristaps 429: { doinline, "slanted", 7 }, /* TEXICMD_SLANTED */
1.3 kristaps 430: { dosp, "sp", 2 }, /* TEXICMD_SP */
431: { dosymbol, " ", 1 }, /* TEXICMD_SPACE */
1.17 kristaps 432: { doignline, "smallbook", 9 }, /* TEXICMD_SMALLBOOK */
1.12 kristaps 433: { dodisplay, "smalldisplay", 12 }, /* TEXICMD_SMALLDISPLAY */
1.3 kristaps 434: { doexample, "smallexample", 12 }, /* TEXICMD_SMALLEXAMPLE */
1.20 kristaps 435: { dodisplay, "smallformat", 11 }, /* TEXICMD_SMALLFORMAT */
1.13 kristaps 436: { dodisplay, "smallindentblock", 16 }, /* TEXICMD_SMALLINDENTBLOCK */
1.3 kristaps 437: { dosymbol, "{", 1 }, /* TEXICMD_SQUIGGLE_LEFT */
438: { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */
1.21 ! kristaps 439: { doinline, "strong", 6 }, /* TEXICMD_STRONG */
1.20 kristaps 440: { dosubsection, "subheading", 10 }, /* TEXICMD_SUBHEADING */
1.1 kristaps 441: { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */
1.3 kristaps 442: { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */
1.20 kristaps 443: { doignline, "syncodeindex", 12 }, /* TEXICMD_SYNCODEINDEX */
1.21 ! kristaps 444: { doinline, "t", 1 }, /* TEXICMD_T */
1.18 kristaps 445: { dotab, "tab", 3 }, /* TEXICMD_TAB */
446: { dosymbol, "\t", 1 }, /* TEXICMD_TABSYM */
1.1 kristaps 447: { dotable, "table", 5 }, /* TEXICMD_TABLE */
448: { doignblock, "tex", 3 }, /* TEXICMD_TEX */
449: { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */
1.16 kristaps 450: { doaccent, "~", 1 }, /* TEXICMD_TILDE */
1.3 kristaps 451: { doignline, "title", 5 }, /* TEXICMD_TITLE */
1.1 kristaps 452: { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */
453: { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */
454: { dotop, "top", 3 }, /* TEXICMD_TOP */
1.16 kristaps 455: { doaccent, "\"", 1 }, /* TEXICMD_UMLAUT */
1.12 kristaps 456: { dosection, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */
1.2 kristaps 457: { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */
1.20 kristaps 458: { dosubsection, "unnumberedsubsec", 16 }, /* TEXICMD_UNNUMBEREDSUBSEC */
1.8 kristaps 459: { dolink, "uref", 4 }, /* TEXICMD_UREF */
460: { dolink, "url", 3 }, /* TEXICMD_URL */
1.12 kristaps 461: { doinline, "var", 3 }, /* TEXICMD_VAR */
1.16 kristaps 462: { doverbinclude, "verbatiminclude", 15 }, /* TEXICMD_VERBATIMINCLUDE */
1.18 kristaps 463: { doignline, "vindex", 6 }, /* TEXICMD_VINDEX */
1.9 kristaps 464: { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */
1.20 kristaps 465: { dotable, "vtable", 6 }, /* TEXICMD_VTABLE */
1.3 kristaps 466: { dobracket, "w", 1 }, /* TEXICMD_W */
1.8 kristaps 467: { dolink, "xref", 4 }, /* TEXICMD_XREF */
1.20 kristaps 468: /* TEXICMD__END */
1.1 kristaps 469: };
470:
1.2 kristaps 471: /*
472: * Unmap the top-most file that we're using.
473: */
1.1 kristaps 474: static void
475: texifilepop(struct texi *p)
476: {
477: struct texifile *f;
478:
479: assert(p->filepos > 0);
480: f = &p->files[--p->filepos];
481: munmap(f->map, f->mapsz);
482: }
483:
1.2 kristaps 484: /*
1.8 kristaps 485: * Unmap all files that we're currently using and free all resources
486: * that we've allocated during the parse.
1.2 kristaps 487: * The utility should exit(...) after this is called.
488: */
1.1 kristaps 489: static void
490: texiexit(struct texi *p)
491: {
1.5 kristaps 492: size_t i;
493:
494: if (p->outcol)
495: putchar('\n');
1.1 kristaps 496:
497: while (p->filepos > 0)
498: texifilepop(p);
1.5 kristaps 499:
500: for (i = 0; i < p->dirsz; i++)
501: free(p->dirs[i]);
1.10 kristaps 502:
1.5 kristaps 503: free(p->dirs);
1.10 kristaps 504: free(p->subtitle);
505: free(p->title);
1.1 kristaps 506: }
507:
1.2 kristaps 508: /*
509: * Fatal error: unmap all files and exit.
510: * The "errstring" is passed to perror(3).
511: */
1.1 kristaps 512: static void
1.2 kristaps 513: texiabort(struct texi *p, const char *errstring)
1.1 kristaps 514: {
515:
516: perror(errstring);
517: texiexit(p);
518: exit(EXIT_FAILURE);
519: }
520:
521: /*
522: * Print a generic warning message (to stderr) tied to our current
523: * location in the parse sequence.
524: */
525: static void
526: texiwarn(const struct texi *p, const char *fmt, ...)
527: {
528: va_list ap;
529:
1.2 kristaps 530: fprintf(stderr, "%s:%zu:%zu: warning: ",
1.1 kristaps 531: p->files[p->filepos - 1].name,
532: p->files[p->filepos - 1].line + 1,
533: p->files[p->filepos - 1].col + 1);
534: va_start(ap, fmt);
535: vfprintf(stderr, fmt, ap);
536: va_end(ap);
537: fputc('\n', stderr);
538: }
539:
1.8 kristaps 540: /*
541: * Print an error message (to stderr) tied to our current location in
542: * the parse sequence, invoke texiexit(), then die.
543: */
1.2 kristaps 544: static void
545: texierr(struct texi *p, const char *fmt, ...)
546: {
547: va_list ap;
548:
549: fprintf(stderr, "%s:%zu:%zu: error: ",
550: p->files[p->filepos - 1].name,
551: p->files[p->filepos - 1].line + 1,
552: p->files[p->filepos - 1].col + 1);
553: va_start(ap, fmt);
554: vfprintf(stderr, fmt, ap);
555: va_end(ap);
556: fputc('\n', stderr);
557: texiexit(p);
558: exit(EXIT_FAILURE);
559: }
560:
1.1 kristaps 561: /*
1.8 kristaps 562: * Put a single data character to the output if we're not ignoring.
563: * Adjusts our output status.
1.18 kristaps 564: * This shouldn't be called for macros: just for ordinary text.
1.1 kristaps 565: */
566: static void
567: texiputchar(struct texi *p, char c)
568: {
569:
1.3 kristaps 570: if (p->ign)
1.1 kristaps 571: return;
1.18 kristaps 572:
573: if ('.' == c && 0 == p->outcol)
574: fputs("\\&", stdout);
575:
1.1 kristaps 576: putchar(c);
1.11 kristaps 577: p->seenvs = 0;
1.1 kristaps 578: if ('\n' == c) {
579: p->outcol = 0;
580: p->seenws = 0;
581: } else
582: p->outcol++;
583: }
584:
585: /*
586: * Put multiple characters (see texiputchar()).
1.18 kristaps 587: * This shouldn't be called for macros: just for ordinary text.
1.1 kristaps 588: */
589: static void
590: texiputchars(struct texi *p, const char *s)
591: {
592:
593: while ('\0' != *s)
594: texiputchar(p, *s++);
595: }
596:
597: /*
1.8 kristaps 598: * Close an mdoc(7) macro opened with teximacroopen().
599: * If there are no more macros on the line, prints a newline.
1.1 kristaps 600: */
601: static void
1.3 kristaps 602: teximacroclose(struct texi *p)
603: {
604:
1.18 kristaps 605: if (p->ign)
606: return;
607:
608: if (0 == --p->outmacro) {
609: putchar('\n');
610: p->outcol = p->seenws = 0;
611: }
1.3 kristaps 612: }
613:
614: /*
1.8 kristaps 615: * Open a mdoc(7) macro.
616: * This is used for line macros, e.g., Qq [foo bar baz].
617: * It can be invoked for nested macros, e.g., Qq Li foo .
1.3 kristaps 618: */
619: static void
620: teximacroopen(struct texi *p, const char *s)
1.1 kristaps 621: {
1.18 kristaps 622: int rc;
623:
624: if (p->ign)
625: return;
626:
627: if (p->outcol && 0 == p->outmacro) {
628: putchar('\n');
629: p->outcol = 0;
630: }
1.1 kristaps 631:
1.5 kristaps 632: if (0 == p->outmacro)
1.18 kristaps 633: putchar('.');
1.5 kristaps 634: else
1.18 kristaps 635: putchar(' ');
636:
637: if (EOF != (rc = fputs(s, stdout)))
638: p->outcol += rc;
639:
640: putchar(' ');
641: p->outcol++;
1.3 kristaps 642: p->outmacro++;
1.5 kristaps 643: p->seenws = 0;
1.1 kristaps 644: }
645:
646: /*
1.8 kristaps 647: * Put a stadnalone mdoc(7) command with the trailing newline.
1.1 kristaps 648: */
649: static void
650: teximacro(struct texi *p, const char *s)
651: {
652:
1.18 kristaps 653: if (p->ign)
654: return;
655:
1.4 kristaps 656: if (p->outmacro)
657: texierr(p, "\"%s\" in open line scope!?", s);
1.18 kristaps 658: if (p->literal)
1.4 kristaps 659: texierr(p, "\"%s\" in a literal scope!?", s);
660:
1.1 kristaps 661: if (p->outcol)
1.18 kristaps 662: putchar('\n');
1.5 kristaps 663:
1.18 kristaps 664: putchar('.');
665: puts(s);
666: p->outcol = p->seenws = 0;
1.1 kristaps 667: }
668:
1.11 kristaps 669: static void
670: texivspace(struct texi *p)
671: {
672:
673: if (p->seenvs)
674: return;
675: teximacro(p, "Pp");
676: p->seenvs = 1;
677: }
678:
1.1 kristaps 679: /*
680: * Advance by a single byte in the input stream.
681: */
682: static void
683: advance(struct texi *p, const char *buf, size_t *pos)
684: {
685:
686: if ('\n' == buf[*pos]) {
687: p->files[p->filepos - 1].line++;
688: p->files[p->filepos - 1].col = 0;
689: } else
690: p->files[p->filepos - 1].col++;
691:
692: (*pos)++;
693: }
694:
1.11 kristaps 695: static void
696: texipunctuate(struct texi *p, const char *buf, size_t sz, size_t *pos)
697: {
698: size_t start, end;
699:
700: if (1 != p->outmacro)
701: return;
702:
703: for (start = end = *pos; end < sz; end++) {
704: switch (buf[end]) {
705: case (','):
706: case (')'):
707: case ('.'):
708: case ('"'):
709: case (':'):
710: case ('!'):
711: case ('?'):
712: continue;
713: default:
714: break;
715: }
716: break;
717: }
718: if (end == *pos)
719: return;
720: if (end + 1 == sz || ' ' == buf[end] || '\n' == buf[end]) {
721: for ( ; start < end; start++) {
722: texiputchar(p, ' ');
723: texiputchar(p, buf[start]);
724: advance(p, buf, pos);
725: }
726: }
727: }
728:
1.1 kristaps 729: /*
730: * Advance to the next non-whitespace word in the input stream.
731: * If we're in literal mode, then print all of the whitespace as we're
732: * doing so.
733: */
734: static size_t
735: advancenext(struct texi *p, const char *buf, size_t sz, size_t *pos)
736: {
737:
1.3 kristaps 738: if (p->literal) {
1.9 kristaps 739: while (*pos < sz && ismspace(buf[*pos])) {
1.5 kristaps 740: if (*pos && '\n' == buf[*pos] &&
741: '\\' == buf[*pos - 1])
742: texiputchar(p, 'e');
1.1 kristaps 743: texiputchar(p, buf[*pos]);
744: advance(p, buf, pos);
745: }
746: return(*pos);
747: }
748:
1.9 kristaps 749: while (*pos < sz && ismspace(buf[*pos])) {
1.1 kristaps 750: p->seenws = 1;
751: /*
752: * If it looks like we've printed a double-line, then
753: * output a paragraph.
754: * FIXME: this is stupid.
755: */
1.11 kristaps 756: if (*pos && '\n' == buf[*pos] && '\n' == buf[*pos - 1])
757: texivspace(p);
1.1 kristaps 758: advance(p, buf, pos);
759: }
760: return(*pos);
761: }
762:
763: /*
764: * Advance to the EOLN in the input stream.
765: */
766: static size_t
1.3 kristaps 767: advanceeoln(struct texi *p, const char *buf,
768: size_t sz, size_t *pos, int consumenl)
1.1 kristaps 769: {
770:
1.8 kristaps 771: /* FIXME: disregards @NEWLINE. */
1.1 kristaps 772: while (*pos < sz && '\n' != buf[*pos])
773: advance(p, buf, pos);
1.3 kristaps 774: if (*pos < sz && consumenl)
775: advance(p, buf, pos);
1.1 kristaps 776: return(*pos);
777: }
778:
779: /*
780: * Advance to position "end", which is an absolute position in the
781: * current buffer greater than or equal to the current position.
782: */
783: static void
784: advanceto(struct texi *p, const char *buf, size_t *pos, size_t end)
785: {
786:
787: assert(*pos <= end);
788: while (*pos < end)
789: advance(p, buf, pos);
790: }
791:
792: /*
793: * Output a free-form word in the input stream, progressing to the next
794: * command or white-space.
795: * This also will advance the input stream.
796: */
797: static void
1.8 kristaps 798: texiword(struct texi *p, const char *buf,
799: size_t sz, size_t *pos, char extra)
1.1 kristaps 800: {
801:
1.18 kristaps 802: if (p->seenws && 0 == p->outmacro && p->outcol > 72 && 0 == p->literal)
1.1 kristaps 803: texiputchar(p, '\n');
1.8 kristaps 804: /* FIXME: abstract this: we use it elsewhere. */
1.3 kristaps 805: if (p->seenws && p->outcol && 0 == p->literal)
1.1 kristaps 806: texiputchar(p, ' ');
807:
808: p->seenws = 0;
809:
1.9 kristaps 810: while (*pos < sz && ! ismspace(buf[*pos])) {
1.1 kristaps 811: switch (buf[*pos]) {
812: case ('@'):
813: case ('}'):
814: case ('{'):
815: return;
816: }
1.8 kristaps 817: if ('\0' != extra && buf[*pos] == extra)
818: return;
1.1 kristaps 819: if (*pos < sz - 1 &&
820: '`' == buf[*pos] &&
821: '`' == buf[*pos + 1]) {
822: texiputchars(p, "\\(lq");
823: advance(p, buf, pos);
824: } else if (*pos < sz - 1 &&
825: '\'' == buf[*pos] &&
826: '\'' == buf[*pos + 1]) {
827: texiputchars(p, "\\(rq");
828: advance(p, buf, pos);
829: } else
830: texiputchar(p, buf[*pos]);
831: advance(p, buf, pos);
832: }
833: }
834:
1.8 kristaps 835: /*
836: * Look up the command at position "pos" in the buffer, returning it (or
837: * TEXICMD__MAX if none found) and setting "end" to be the absolute
838: * index after the command name.
839: */
1.1 kristaps 840: static enum texicmd
841: texicmd(struct texi *p, const char *buf,
842: size_t pos, size_t sz, size_t *end)
843: {
844: size_t i, len;
845:
846: assert('@' == buf[pos]);
1.3 kristaps 847:
1.9 kristaps 848: if ((*end = pos) == sz)
849: return(TEXICMD__MAX);
850: else if ((*end = ++pos) == sz)
1.3 kristaps 851: return(TEXICMD__MAX);
852:
853: /* Alphabetic commands are special. */
854: if ( ! isalpha(buf[pos])) {
1.9 kristaps 855: if ((*end = pos + 1) == sz)
856: return(TEXICMD__MAX);
1.3 kristaps 857: for (i = 0; i < TEXICMD__MAX; i++) {
858: if (1 != texitoks[i].len)
859: continue;
860: if (0 == strncmp(texitoks[i].tok, &buf[pos], 1))
861: return(i);
862: }
863: texiwarn(p, "bad command: @%c", buf[pos]);
864: return(TEXICMD__MAX);
865: }
866:
1.9 kristaps 867: for (*end = pos; *end < sz && ! ismspace(buf[*end]); (*end)++)
1.3 kristaps 868: if ((*end > pos && ('@' == buf[*end] ||
869: '{' == buf[*end] || '}' == buf[*end])))
1.1 kristaps 870: break;
871:
872: len = *end - pos;
873: for (i = 0; i < TEXICMD__MAX; i++) {
874: if (len != texitoks[i].len)
875: continue;
876: if (0 == strncmp(texitoks[i].tok, &buf[pos], len))
877: return(i);
878: }
879:
1.3 kristaps 880: texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
1.1 kristaps 881: return(TEXICMD__MAX);
882: }
883:
1.8 kristaps 884: /*
885: * Parse an argument from a bracketed command, e.g., @url{foo, baz}.
886: * Num should be set to the argument we're currently parsing, although
887: * it suffixes for it to be zero or non-zero.
888: * This will return 1 if there are more arguments, 0 otherwise.
889: * This will stop (returning 0) in the event of EOF or if we're not at a
890: * bracket for the zeroth parse.
891: */
892: static int
893: parsearg(struct texi *p, const char *buf,
894: size_t sz, size_t *pos, size_t num)
895: {
896: size_t end;
897: enum texicmd cmd;
898:
1.9 kristaps 899: while (*pos < sz && ismspace(buf[*pos]))
1.8 kristaps 900: advance(p, buf, pos);
901: if (*pos == sz || (0 == num && '{' != buf[*pos]))
902: return(0);
903: if (0 == num)
904: advance(p, buf, pos);
905:
906: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
907: switch (buf[*pos]) {
908: case (','):
909: advance(p, buf, pos);
910: return(1);
911: case ('}'):
912: advance(p, buf, pos);
913: return(0);
914: case ('{'):
915: if (0 == p->ign)
916: texiwarn(p, "unexpected \"{\"");
917: advance(p, buf, pos);
918: continue;
919: case ('@'):
920: break;
921: default:
922: texiword(p, buf, sz, pos, ',');
923: continue;
924: }
925:
926: cmd = texicmd(p, buf, *pos, sz, &end);
927: advanceto(p, buf, pos, end);
928: if (TEXICMD__MAX == cmd)
929: continue;
930: if (NULL != texitoks[cmd].fp)
931: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
932: }
933: return(0);
934: }
935:
936: /*
937: * Parse until the end of a bracketed statement, e.g., @foo{bar baz}.
938: * This will stop in the event of EOF or if we're not at a bracket.
939: */
1.1 kristaps 940: static void
941: parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
942: {
943: size_t end;
944: enum texicmd cmd;
945:
1.9 kristaps 946: while (*pos < sz && ismspace(buf[*pos]))
1.3 kristaps 947: advance(p, buf, pos);
948:
1.1 kristaps 949: if (*pos == sz || '{' != buf[*pos])
950: return;
951: advance(p, buf, pos);
952:
953: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
954: switch (buf[*pos]) {
955: case ('}'):
956: advance(p, buf, pos);
957: return;
958: case ('{'):
1.3 kristaps 959: if (0 == p->ign)
960: texiwarn(p, "unexpected \"{\"");
961: advance(p, buf, pos);
962: continue;
963: case ('@'):
964: break;
965: default:
1.8 kristaps 966: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 967: continue;
968: }
969:
970: cmd = texicmd(p, buf, *pos, sz, &end);
971: advanceto(p, buf, pos, end);
972: if (TEXICMD__MAX == cmd)
973: continue;
974: if (NULL != texitoks[cmd].fp)
975: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
976: }
977: }
978:
979: /*
980: * This should be invoked when we're on a macro line and want to process
981: * to the end of the current input line, doing all of our macros along
982: * the way.
983: */
984: static void
985: parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
986: {
987: size_t end;
988: enum texicmd cmd;
989:
990: while (*pos < sz && '\n' != buf[*pos]) {
991: while (*pos < sz && isws(buf[*pos])) {
992: p->seenws = 1;
1.19 kristaps 993: if (p->literal)
994: texiputchar(p, buf[*pos]);
1.3 kristaps 995: advance(p, buf, pos);
996: }
997: switch (buf[*pos]) {
998: case ('}'):
999: if (0 == p->ign)
1000: texiwarn(p, "unexpected \"}\"");
1001: advance(p, buf, pos);
1002: continue;
1003: case ('{'):
1004: if (0 == p->ign)
1005: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 1006: advance(p, buf, pos);
1007: continue;
1008: case ('@'):
1009: break;
1010: default:
1.8 kristaps 1011: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 1012: continue;
1013: }
1014:
1015: cmd = texicmd(p, buf, *pos, sz, &end);
1016: advanceto(p, buf, pos, end);
1017: if (TEXICMD__MAX == cmd)
1018: continue;
1019: if (NULL != texitoks[cmd].fp)
1020: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1021: }
1022: }
1023:
1.8 kristaps 1024: /*
1025: * Parse a single word or command.
1026: * This will return immediately at the EOF.
1027: */
1.1 kristaps 1028: static void
1.3 kristaps 1029: parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
1030: {
1031: size_t end;
1032: enum texicmd cmd;
1033:
1034: if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
1035: return;
1036:
1037: switch (buf[*pos]) {
1038: case ('}'):
1039: if (0 == p->ign)
1040: texiwarn(p, "unexpected \"}\"");
1041: advance(p, buf, pos);
1042: return;
1043: case ('{'):
1044: if (0 == p->ign)
1045: texiwarn(p, "unexpected \"{\"");
1046: advance(p, buf, pos);
1047: return;
1048: case ('@'):
1049: break;
1050: default:
1.8 kristaps 1051: texiword(p, buf, sz, pos, '\0');
1.3 kristaps 1052: return;
1053: }
1054:
1055: cmd = texicmd(p, buf, *pos, sz, &end);
1056: advanceto(p, buf, pos, end);
1057: if (TEXICMD__MAX == cmd)
1058: return;
1059: if (NULL != texitoks[cmd].fp)
1060: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1061: }
1062:
1.15 kristaps 1063: static int
1064: parselinearg(struct texi *p, const char *buf, size_t sz, size_t *pos)
1065: {
1066:
1067: while (*pos < sz && isws(buf[*pos])) {
1068: p->seenws = 1;
1069: advance(p, buf, pos);
1070: }
1071:
1072: if (*pos < sz && '{' == buf[*pos])
1073: parsebracket(p, buf, sz, pos);
1074: else if ('\n' != buf[*pos])
1075: parsesingle(p, buf, sz, pos);
1076: else
1077: return(0);
1078:
1079: return(1);
1080: }
1081:
1.8 kristaps 1082: /*
1083: * Parse til the end of the buffer.
1084: */
1.3 kristaps 1085: static void
1.7 kristaps 1086: parseeof(struct texi *p, const char *buf, size_t sz)
1087: {
1088: size_t pos;
1089:
1090: for (pos = 0; pos < sz; )
1091: parsesingle(p, buf, sz, &pos);
1092: }
1093:
1.8 kristaps 1094: /*
1095: * Parse a block sequence until we have the "@end endtoken" command
1096: * invocation.
1097: * This will return immediately at EOF.
1098: */
1.7 kristaps 1099: static void
1.1 kristaps 1100: parseto(struct texi *p, const char *buf,
1101: size_t sz, size_t *pos, const char *endtoken)
1102: {
1103: size_t end;
1104: enum texicmd cmd;
1105: size_t endtoksz;
1106:
1107: endtoksz = strlen(endtoken);
1108: assert(endtoksz > 0);
1109:
1110: while ((*pos = advancenext(p, buf, sz, pos)) < sz) {
1111: switch (buf[*pos]) {
1112: case ('}'):
1.3 kristaps 1113: if (0 == p->ign)
1114: texiwarn(p, "unexpected \"}\"");
1.1 kristaps 1115: advance(p, buf, pos);
1116: continue;
1117: case ('{'):
1.3 kristaps 1118: if (0 == p->ign)
1119: texiwarn(p, "unexpected \"{\"");
1.1 kristaps 1120: advance(p, buf, pos);
1121: continue;
1122: case ('@'):
1123: break;
1124: default:
1.8 kristaps 1125: texiword(p, buf, sz, pos, '\0');
1.1 kristaps 1126: continue;
1127: }
1128:
1129: cmd = texicmd(p, buf, *pos, sz, &end);
1130: advanceto(p, buf, pos, end);
1131: if (TEXICMD_END == cmd) {
1.2 kristaps 1132: while (*pos < sz && isws(buf[*pos]))
1.1 kristaps 1133: advance(p, buf, pos);
1134: /*
1.8 kristaps 1135: * FIXME: check the full word, not just its
1136: * initial substring!
1.1 kristaps 1137: */
1138: if (sz - *pos >= endtoksz && 0 == strncmp
1139: (&buf[*pos], endtoken, endtoksz)) {
1.3 kristaps 1140: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 1141: break;
1142: }
1.3 kristaps 1143: if (0 == p->ign)
1144: texiwarn(p, "unexpected \"end\"");
1145: advanceeoln(p, buf, sz, pos, 0);
1.1 kristaps 1146: continue;
1147: } else if (TEXICMD__MAX != cmd)
1148: if (NULL != texitoks[cmd].fp)
1149: (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
1150: }
1151: }
1152:
1.8 kristaps 1153: /*
1154: * Memory-map the file "fname" and begin parsing it.
1155: * This can be called in a nested context.
1156: */
1.1 kristaps 1157: static void
1.16 kristaps 1158: parsefile(struct texi *p, const char *fname, int parse)
1.2 kristaps 1159: {
1.16 kristaps 1160: struct texifile *f;
1161: int fd;
1162: struct stat st;
1163: size_t i;
1.2 kristaps 1164:
1165: assert(p->filepos < 64);
1166: f = &p->files[p->filepos];
1167: memset(f, 0, sizeof(struct texifile));
1168:
1169: f->name = fname;
1170: if (-1 == (fd = open(fname, O_RDONLY, 0))) {
1171: texiabort(p, fname);
1172: } else if (-1 == fstat(fd, &st)) {
1173: close(fd);
1174: texiabort(p, fname);
1175: }
1176:
1177: f->mapsz = st.st_size;
1178: f->map = mmap(NULL, f->mapsz,
1179: PROT_READ, MAP_SHARED, fd, 0);
1180: close(fd);
1181:
1182: if (MAP_FAILED == f->map)
1183: texiabort(p, fname);
1184:
1185: p->filepos++;
1.16 kristaps 1186: if ( ! parse) {
1187: /*
1188: * We're printing verbatim output.
1189: * Make sure it doesn't get interpreted as mdoc by
1190: * escaping escapes and making sure leading dots don't
1191: * trigger mdoc(7) expansion.
1192: */
1193: for (i = 0; i < f->mapsz; i++) {
1194: if (i > 0 && '.' == f->map[i])
1195: if ('\n' == f->map[i - 1])
1196: fputs("\\&", stdout);
1197: putchar(f->map[i]);
1198: if ('\\' == f->map[i])
1199: putchar('e');
1200: }
1201: } else
1202: parseeof(p, f->map, f->mapsz);
1.2 kristaps 1203: texifilepop(p);
1204: }
1205:
1206: static void
1.15 kristaps 1207: dodefn(struct texi *p, enum texicmd cmd,
1.3 kristaps 1208: const char *buf, size_t sz, size_t *pos)
1209: {
1210: const char *blk;
1211:
1.5 kristaps 1212: blk = NULL;
1.3 kristaps 1213: switch (cmd) {
1.15 kristaps 1214: case (TEXICMD_DEFFN):
1215: case (TEXICMD_DEFTP):
1.3 kristaps 1216: case (TEXICMD_DEFTYPEFN):
1217: case (TEXICMD_DEFTYPEFUN):
1.15 kristaps 1218: case (TEXICMD_DEFTYPEVAR):
1219: case (TEXICMD_DEFTYPEVR):
1220: case (TEXICMD_DEFUN):
1221: case (TEXICMD_DEFVAR):
1222: case (TEXICMD_DEFVR):
1.5 kristaps 1223: blk = texitoks[cmd].tok;
1.3 kristaps 1224: break;
1.5 kristaps 1225: default:
1.3 kristaps 1226: break;
1227: }
1228:
1229: if (p->ign) {
1.15 kristaps 1230: NULL != blk ?
1231: parseto(p, buf, sz, pos, blk) :
1232: parseeoln(p, buf, sz, pos);
1.3 kristaps 1233: return;
1234: }
1235:
1.15 kristaps 1236: if (NULL != blk)
1237: texivspace(p);
1238:
1.3 kristaps 1239: switch (cmd) {
1.15 kristaps 1240: case (TEXICMD_DEFMAC):
1241: case (TEXICMD_DEFMACX):
1242: texiputchars(p, "Macro");
1243: break;
1244: case (TEXICMD_DEFTYPEVAR):
1245: case (TEXICMD_DEFTYPEVARX):
1246: case (TEXICMD_DEFVAR):
1247: case (TEXICMD_DEFVARX):
1248: texiputchars(p, "Variable");
1249: break;
1.3 kristaps 1250: case (TEXICMD_DEFTYPEFUN):
1.15 kristaps 1251: case (TEXICMD_DEFTYPEFUNX):
1252: case (TEXICMD_DEFUN):
1253: case (TEXICMD_DEFUNX):
1254: texiputchars(p, "Function");
1.3 kristaps 1255: break;
1256: default:
1.15 kristaps 1257: parselinearg(p, buf, sz, pos);
1.3 kristaps 1258: break;
1259: }
1.15 kristaps 1260:
1261: texiputchars(p, ":\n");
1262:
1263: switch (cmd) {
1264: case (TEXICMD_DEFMAC):
1265: case (TEXICMD_DEFMACX):
1266: teximacroopen(p, "Dv");
1267: while (parselinearg(p, buf, sz, pos))
1268: /* Spin. */ ;
1269: teximacroclose(p);
1270: break;
1271: case (TEXICMD_DEFFN):
1272: case (TEXICMD_DEFFNX):
1273: case (TEXICMD_DEFUN):
1274: case (TEXICMD_DEFUNX):
1275: teximacroopen(p, "Fo");
1276: parselinearg(p, buf, sz, pos);
1277: teximacroclose(p);
1278: teximacroopen(p, "Fa");
1279: while (parselinearg(p, buf, sz, pos))
1280: /* Spin. */ ;
1281: teximacroclose(p);
1282: teximacro(p, "Fc");
1283: break;
1284: case (TEXICMD_DEFTYPEFUN):
1285: case (TEXICMD_DEFTYPEFUNX):
1286: case (TEXICMD_DEFTYPEFN):
1287: case (TEXICMD_DEFTYPEFNX):
1288: teximacroopen(p, "Ft");
1289: parselinearg(p, buf, sz, pos);
1290: teximacroclose(p);
1291: teximacroopen(p, "Fo");
1292: parselinearg(p, buf, sz, pos);
1293: teximacroclose(p);
1294: teximacroopen(p, "Fa");
1295: while (parselinearg(p, buf, sz, pos))
1296: /* Spin. */ ;
1297: teximacroclose(p);
1298: teximacro(p, "Fc");
1299: break;
1300: case (TEXICMD_DEFTP):
1301: case (TEXICMD_DEFTPX):
1302: case (TEXICMD_DEFTYPEVAR):
1303: case (TEXICMD_DEFTYPEVARX):
1304: case (TEXICMD_DEFTYPEVR):
1305: case (TEXICMD_DEFTYPEVRX):
1306: teximacroopen(p, "Vt");
1307: while (parselinearg(p, buf, sz, pos))
1308: /* Spin. */ ;
1309: teximacroclose(p);
1310: break;
1311: case (TEXICMD_DEFVAR):
1312: case (TEXICMD_DEFVARX):
1313: case (TEXICMD_DEFVR):
1314: case (TEXICMD_DEFVRX):
1315: teximacroopen(p, "Va");
1316: while (parselinearg(p, buf, sz, pos))
1317: /* Spin. */ ;
1318: teximacroclose(p);
1319: break;
1320: default:
1321: abort();
1.3 kristaps 1322: }
1.15 kristaps 1323:
1.11 kristaps 1324: texivspace(p);
1.3 kristaps 1325: if (NULL != blk)
1326: parseto(p, buf, sz, pos, blk);
1327: }
1328:
1329: static void
1.1 kristaps 1330: doignblock(struct texi *p, enum texicmd cmd,
1331: const char *buf, size_t sz, size_t *pos)
1332: {
1333:
1.3 kristaps 1334: p->ign++;
1.5 kristaps 1335: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1336: p->ign--;
1.1 kristaps 1337: }
1338:
1339: static void
1.3 kristaps 1340: doblock(struct texi *p, enum texicmd cmd,
1.1 kristaps 1341: const char *buf, size_t sz, size_t *pos)
1342: {
1343:
1.5 kristaps 1344: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.1 kristaps 1345: }
1346:
1347: static void
1.12 kristaps 1348: doinline(struct texi *p, enum texicmd cmd,
1349: const char *buf, size_t sz, size_t *pos)
1.1 kristaps 1350: {
1.21 ! kristaps 1351: const char *macro = NULL;
1.12 kristaps 1352:
1353: switch (cmd) {
1.21 ! kristaps 1354: case (TEXICMD_CODE):
! 1355: case (TEXICMD_KBD):
! 1356: case (TEXICMD_SAMP):
! 1357: case (TEXICMD_T):
! 1358: macro = "Li";
! 1359: break;
! 1360: case (TEXICMD_CITE):
! 1361: case (TEXICMD_DFN):
! 1362: case (TEXICMD_EMPH):
! 1363: case (TEXICMD_I):
! 1364: case (TEXICMD_SLANTED):
! 1365: macro = "Em";
! 1366: break;
! 1367: case (TEXICMD_B):
! 1368: case (TEXICMD_STRONG):
! 1369: macro = "Sy";
! 1370: break;
1.12 kristaps 1371: case (TEXICMD_COMMAND):
1372: macro = "Xr";
1373: break;
1374: case (TEXICMD_ENV):
1375: macro = "Ev";
1376: break;
1377: case (TEXICMD_FILE):
1378: macro = "Pa";
1379: break;
1.16 kristaps 1380: case (TEXICMD_OPTION):
1381: macro = "Op";
1382: break;
1.12 kristaps 1383: case (TEXICMD_VAR):
1384: macro = "Va";
1385: break;
1386: default:
1387: abort();
1388: }
1389:
1.21 ! kristaps 1390: if (NULL == macro || p->literal) {
1.12 kristaps 1391: parsebracket(p, buf, sz, pos);
1392: return;
1393: }
1.1 kristaps 1394:
1.5 kristaps 1395: teximacroopen(p, macro);
1.1 kristaps 1396: p->seenws = 0;
1397: parsebracket(p, buf, sz, pos);
1.11 kristaps 1398: texipunctuate(p, buf, sz, pos);
1.5 kristaps 1399: teximacroclose(p);
1.1 kristaps 1400: }
1401:
1402: static void
1.16 kristaps 1403: doverbinclude(struct texi *p, enum texicmd cmd,
1404: const char *buf, size_t sz, size_t *pos)
1405: {
1406: char fname[PATH_MAX], path[PATH_MAX];
1407: int rc;
1408: size_t i;
1409:
1410: while (*pos < sz && ' ' == buf[*pos])
1411: advance(p, buf, pos);
1412:
1413: /* Read in the filename. */
1414: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1415: if (i == sizeof(fname) - 1)
1416: break;
1417: fname[i] = buf[*pos];
1418: advance(p, buf, pos);
1419: }
1420:
1421: if (i == 0)
1422: texierr(p, "path too short");
1423: else if ('\n' != buf[*pos])
1424: texierr(p, "path too long");
1425: else if ('/' == fname[0])
1426: texierr(p, "no absolute paths");
1427: fname[i] = '\0';
1428:
1429: if (strstr(fname, "../") || strstr(fname, "/.."))
1430: texierr(p, "insecure path");
1431:
1432: rc = snprintf(path, sizeof(path),
1433: "%s/%s", p->dirs[0], fname);
1434: if (rc < 0)
1435: texierr(p, "couldn't format path");
1436: else if ((size_t)rc >= sizeof(path))
1437: texierr(p, "path too long");
1438:
1439: parsefile(p, path, 0);
1440: }
1441:
1442: static void
1.2 kristaps 1443: doinclude(struct texi *p, enum texicmd cmd,
1444: const char *buf, size_t sz, size_t *pos)
1445: {
1446: char fname[PATH_MAX], path[PATH_MAX];
1447: size_t i;
1448: int rc;
1449:
1450: while (*pos < sz && ' ' == buf[*pos])
1451: advance(p, buf, pos);
1452:
1453: /* Read in the filename. */
1454: for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) {
1455: if (i == sizeof(fname) - 1)
1456: break;
1457: fname[i] = buf[*pos];
1458: advance(p, buf, pos);
1459: }
1460:
1461: if (i == 0)
1462: texierr(p, "path too short");
1463: else if ('\n' != buf[*pos])
1464: texierr(p, "path too long");
1465: else if ('/' == fname[0])
1466: texierr(p, "no absolute paths");
1467: fname[i] = '\0';
1468:
1469: if (strstr(fname, "../") || strstr(fname, "/.."))
1470: texierr(p, "insecure path");
1471:
1.5 kristaps 1472: for (i = 0; i < p->dirsz; i++) {
1473: rc = snprintf(path, sizeof(path),
1474: "%s/%s", p->dirs[i], fname);
1475: if (rc < 0)
1476: texierr(p, "couldn't format path");
1477: else if ((size_t)rc >= sizeof(path))
1478: texierr(p, "path too long");
1479: else if (-1 == access(path, R_OK))
1480: continue;
1481:
1.16 kristaps 1482: parsefile(p, path, 1);
1.5 kristaps 1483: return;
1484: }
1.2 kristaps 1485:
1.5 kristaps 1486: texierr(p, "couldn't find %s in includes", fname);
1.2 kristaps 1487: }
1488:
1489: static void
1.1 kristaps 1490: dobracket(struct texi *p, enum texicmd cmd,
1491: const char *buf, size_t sz, size_t *pos)
1492: {
1493:
1494: parsebracket(p, buf, sz, pos);
1495: }
1496:
1497: static void
1.3 kristaps 1498: dodisplay(struct texi *p, enum texicmd cmd,
1499: const char *buf, size_t sz, size_t *pos)
1500: {
1501:
1.20 kristaps 1502: switch (cmd) {
1503: case (TEXICMD_FORMAT):
1504: case (TEXICMD_SMALLFORMAT):
1505: teximacro(p, "Bd -filled");
1506: break;
1507: default:
1508: teximacro(p, "Bd -filled -offset indent");
1509: break;
1510: }
1511:
1.11 kristaps 1512: p->seenvs = 1;
1.12 kristaps 1513: /* FIXME: ignore and parseeoln. */
1.3 kristaps 1514: advanceeoln(p, buf, sz, pos, 1);
1.13 kristaps 1515: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.5 kristaps 1516: teximacro(p, "Ed");
1.3 kristaps 1517: }
1518:
1519: static void
1.1 kristaps 1520: doexample(struct texi *p, enum texicmd cmd,
1521: const char *buf, size_t sz, size_t *pos)
1522: {
1523:
1.5 kristaps 1524: teximacro(p, "Bd -literal -offset indent");
1.12 kristaps 1525: /* FIXME: ignore and parseeoln. */
1.3 kristaps 1526: advanceeoln(p, buf, sz, pos, 1);
1527: p->literal++;
1.13 kristaps 1528: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.3 kristaps 1529: p->literal--;
1.5 kristaps 1530: teximacro(p, "Ed");
1.1 kristaps 1531: }
1532:
1533: static void
1534: dobye(struct texi *p, enum texicmd cmd,
1535: const char *buf, size_t sz, size_t *pos)
1536: {
1537:
1538: texiexit(p);
1539: exit(EXIT_SUCCESS);
1540: }
1541:
1542: static void
1.10 kristaps 1543: dotitle(struct texi *p, enum texicmd cmd,
1544: const char *buf, size_t sz, size_t *pos)
1545: {
1546: size_t start, end;
1547:
1548: while (*pos < sz && isws(buf[*pos]))
1549: advance(p, buf, pos);
1550: start = end = *pos;
1551: while (end < sz && '\n' != buf[end])
1552: end++;
1553: free(p->subtitle);
1554: p->subtitle = malloc(end - start + 1);
1555: memcpy(p->subtitle, &buf[start], end - start);
1556: p->subtitle[end - start] = '\0';
1557: }
1558:
1559: static void
1.16 kristaps 1560: doaccent(struct texi *p, enum texicmd cmd,
1561: const char *buf, size_t sz, size_t *pos)
1562: {
1563:
1564: if (*pos == sz)
1565: return;
1566: advance(p, buf, pos);
1567: switch (cmd) {
1568: case (TEXICMD_ACUTE):
1569: switch (buf[*pos]) {
1570: case ('a'): case ('A'):
1571: case ('e'): case ('E'):
1572: case ('i'): case ('I'):
1573: case ('o'): case ('O'):
1574: case ('u'): case ('U'):
1575: texiputchars(p, "\\(\'");
1576: texiputchar(p, buf[*pos]);
1577: break;
1578: default:
1579: texiputchar(p, buf[*pos]);
1580: }
1581: break;
1582: case (TEXICMD_CIRCUMFLEX):
1583: switch (buf[*pos]) {
1584: case ('a'): case ('A'):
1585: case ('e'): case ('E'):
1586: case ('i'): case ('I'):
1587: case ('o'): case ('O'):
1588: case ('u'): case ('U'):
1589: texiputchars(p, "\\(^");
1590: texiputchar(p, buf[*pos]);
1591: break;
1592: default:
1593: texiputchar(p, buf[*pos]);
1594: }
1595: break;
1596: case (TEXICMD_GRAVE):
1597: switch (buf[*pos]) {
1598: case ('a'): case ('A'):
1599: case ('e'): case ('E'):
1600: case ('i'): case ('I'):
1601: case ('o'): case ('O'):
1602: case ('u'): case ('U'):
1603: texiputchars(p, "\\(`");
1604: texiputchar(p, buf[*pos]);
1605: break;
1606: default:
1607: texiputchar(p, buf[*pos]);
1608: }
1609: break;
1610: case (TEXICMD_TILDE):
1611: switch (buf[*pos]) {
1612: case ('a'): case ('A'):
1613: case ('n'): case ('N'):
1614: case ('o'): case ('O'):
1615: texiputchars(p, "\\(~");
1616: texiputchar(p, buf[*pos]);
1617: break;
1618: default:
1619: texiputchar(p, buf[*pos]);
1620: }
1621: break;
1622: case (TEXICMD_UMLAUT):
1623: switch (buf[*pos]) {
1624: case ('a'): case ('A'):
1625: case ('e'): case ('E'):
1626: case ('i'): case ('I'):
1627: case ('o'): case ('O'):
1628: case ('u'): case ('U'):
1629: case ('y'):
1630: texiputchars(p, "\\(:");
1631: texiputchar(p, buf[*pos]);
1632: break;
1633: default:
1634: texiputchar(p, buf[*pos]);
1635: }
1636: break;
1637: default:
1638: abort();
1639: }
1640: }
1641:
1642: static void
1.1 kristaps 1643: dosymbol(struct texi *p, enum texicmd cmd,
1644: const char *buf, size_t sz, size_t *pos)
1645: {
1646:
1.3 kristaps 1647: if (p->seenws && p->outcol && 0 == p->literal) {
1648: texiputchar(p, ' ');
1649: p->seenws = 0;
1650: }
1651:
1.1 kristaps 1652: switch (cmd) {
1.3 kristaps 1653: case (TEXICMD_ASTERISK):
1654: case (TEXICMD_NEWLINE):
1655: case (TEXICMD_SPACE):
1.18 kristaps 1656: case (TEXICMD_TABSYM):
1.3 kristaps 1657: texiputchar(p, ' ');
1658: break;
1.1 kristaps 1659: case (TEXICMD_AT):
1.3 kristaps 1660: texiputchar(p, '@');
1661: break;
1662: case (TEXICMD_BANG):
1663: texiputchar(p, '!');
1.7 kristaps 1664: break;
1665: case (TEXICMD_BULLET):
1666: texiputchars(p, "\\(bu");
1.1 kristaps 1667: break;
1668: case (TEXICMD_COPYRIGHT):
1669: texiputchars(p, "\\(co");
1670: break;
1.2 kristaps 1671: case (TEXICMD_DOTS):
1672: texiputchars(p, "...");
1673: break;
1.15 kristaps 1674: case (TEXICMD_ERROR):
1675: texiputchars(p, "error\\(->");
1.17 kristaps 1676: break;
1677: case (TEXICMD_EXPANSION):
1678: texiputchars(p, "\\(->");
1.15 kristaps 1679: break;
1.1 kristaps 1680: case (TEXICMD_LATEX):
1681: texiputchars(p, "LaTeX");
1682: break;
1.3 kristaps 1683: case (TEXICMD_QUESTIONMARK):
1684: texiputchar(p, '?');
1.15 kristaps 1685: break;
1686: case (TEXICMD_RESULT):
1687: texiputchars(p, "\\(rA");
1.3 kristaps 1688: break;
1689: case (TEXICMD_SQUIGGLE_LEFT):
1690: texiputchars(p, "{");
1691: break;
1692: case (TEXICMD_SQUIGGLE_RIGHT):
1693: texiputchars(p, "}");
1694: break;
1.1 kristaps 1695: case (TEXICMD_TEXSYM):
1696: texiputchars(p, "TeX");
1697: break;
1.3 kristaps 1698: case (TEXICMD_COLON):
1699: case (TEXICMD_HYPHEN):
1700: break;
1.1 kristaps 1701: default:
1.5 kristaps 1702: texiwarn(p, "sym: %d", cmd);
1.1 kristaps 1703: abort();
1704: }
1705:
1.5 kristaps 1706: if (texitoks[cmd].len > 1)
1707: doignbracket(p, cmd, buf, sz, pos);
1.1 kristaps 1708: }
1709:
1710: static void
1711: doquotation(struct texi *p, enum texicmd cmd,
1712: const char *buf, size_t sz, size_t *pos)
1713: {
1714:
1.5 kristaps 1715: teximacro(p, "Qo");
1.1 kristaps 1716: parseto(p, buf, sz, pos, "quotation");
1.5 kristaps 1717: teximacro(p, "Qc");
1.1 kristaps 1718: }
1719:
1.3 kristaps 1720: static void
1721: domath(struct texi *p, enum texicmd cmd,
1722: const char *buf, size_t sz, size_t *pos)
1723: {
1724: size_t nest;
1725:
1726: /*
1727: * Math handling is different from everything else.
1728: * We don't allow any subcomponents, and we ignore the rules in
1729: * terms of @-commands.
1730: * This departs from GNU's rules, but whatever.
1731: */
1732: while (*pos < sz && isws(buf[*pos]))
1733: advance(p, buf, pos);
1734: if (*pos == sz || '{' != buf[*pos])
1735: return;
1736: advance(p, buf, pos);
1737: if (p->seenws && p->outcol && 0 == p->literal)
1738: texiputchar(p, ' ');
1739: p->seenws = 0;
1740: for (nest = 1; *pos < sz && nest > 0; ) {
1741: if ('{' == buf[*pos])
1742: nest++;
1743: else if ('}' == buf[*pos])
1744: if (0 == --nest)
1745: continue;
1746: texiputchar(p, buf[*pos]);
1747: advance(p, buf, pos);
1748: }
1749: if (*pos == sz)
1750: return;
1751: assert('}' == buf[*pos]);
1752: advance(p, buf, pos);
1753: }
1754:
1.1 kristaps 1755: static void
1.8 kristaps 1756: dolink(struct texi *p, enum texicmd cmd,
1.1 kristaps 1757: const char *buf, size_t sz, size_t *pos)
1758: {
1.8 kristaps 1759: int c;
1.1 kristaps 1760:
1761: switch (cmd) {
1762: case (TEXICMD_EMAIL):
1.5 kristaps 1763: teximacroopen(p, "Mt");
1.1 kristaps 1764: break;
1.3 kristaps 1765: case (TEXICMD_UREF):
1.1 kristaps 1766: case (TEXICMD_URL):
1.5 kristaps 1767: teximacroopen(p, "Lk");
1.1 kristaps 1768: break;
1.8 kristaps 1769: case (TEXICMD_XREF):
1770: texiputchars(p, "See Section");
1771: teximacroopen(p, "Qq");
1772: break;
1773: case (TEXICMD_PXREF):
1774: texiputchars(p, "see Section");
1775: teximacroopen(p, "Qq");
1776: break;
1.1 kristaps 1777: default:
1.8 kristaps 1778: abort();
1.1 kristaps 1779: }
1.8 kristaps 1780:
1781: c = parsearg(p, buf, sz, pos, 0);
1782: p->ign++;
1783: while (c > 0)
1784: c = parsearg(p, buf, sz, pos, 1);
1785: p->ign--;
1786:
1.11 kristaps 1787: texipunctuate(p, buf, sz, pos);
1.8 kristaps 1788: teximacroclose(p);
1789: }
1790:
1791: static void
1792: doignargn(struct texi *p, enum texicmd cmd,
1793: const char *buf, size_t sz, size_t *pos)
1794: {
1795: int c;
1796:
1797: c = parsearg(p, buf, sz, pos, 0);
1798: p->ign++;
1799: while (c > 0)
1800: c = parsearg(p, buf, sz, pos, 1);
1801: p->ign--;
1.1 kristaps 1802: }
1803:
1804: static void
1805: dosubsection(struct texi *p, enum texicmd cmd,
1806: const char *buf, size_t sz, size_t *pos)
1807: {
1.13 kristaps 1808:
1809: if (p->outmacro)
1810: texierr(p, "\"Em\" in open line scope!?");
1811: else if (p->literal)
1812: texierr(p, "\"Em\" in a literal scope!?");
1.1 kristaps 1813:
1.21 ! kristaps 1814: /* We don't have a subsubsection, so make one up. */
1.11 kristaps 1815: texivspace(p);
1.5 kristaps 1816: teximacroopen(p, "Em");
1.3 kristaps 1817: parseeoln(p, buf, sz, pos);
1.5 kristaps 1818: teximacroclose(p);
1.11 kristaps 1819: texivspace(p);
1.1 kristaps 1820: }
1821:
1822: static void
1823: dosection(struct texi *p, enum texicmd cmd,
1824: const char *buf, size_t sz, size_t *pos)
1825: {
1.12 kristaps 1826: const char *blk;
1827:
1828: switch (cmd) {
1829: case (TEXICMD_APPENDIX):
1830: case (TEXICMD_CHAPTER):
1831: case (TEXICMD_TOP):
1832: case (TEXICMD_UNNUMBERED):
1833: blk = "Sh";
1834: break;
1835: case (TEXICMD_APPENDIXSEC):
1836: case (TEXICMD_HEADING):
1837: case (TEXICMD_SECTION):
1838: case (TEXICMD_UNNUMBEREDSEC):
1839: blk = "Ss";
1840: break;
1841: default:
1842: abort();
1843: }
1.1 kristaps 1844:
1.3 kristaps 1845: if (p->outmacro)
1.12 kristaps 1846: texierr(p, "%s in open line scope!?", blk);
1.3 kristaps 1847: else if (p->literal)
1.12 kristaps 1848: texierr(p, "%s in a literal scope!?", blk);
1.3 kristaps 1849:
1.12 kristaps 1850: teximacroopen(p, blk);
1.3 kristaps 1851: parseeoln(p, buf, sz, pos);
1852: teximacroclose(p);
1.11 kristaps 1853: p->seenvs = 1;
1.3 kristaps 1854: }
1855:
1856: static void
1857: dosp(struct texi *p, enum texicmd cmd,
1858: const char *buf, size_t sz, size_t *pos)
1859: {
1860:
1.11 kristaps 1861: texivspace(p);
1.12 kristaps 1862: /* FIXME: ignore and parseeoln. */
1.3 kristaps 1863: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 1864: }
1865:
1866: static void
1867: dotop(struct texi *p, enum texicmd cmd,
1868: const char *buf, size_t sz, size_t *pos)
1869: {
1.10 kristaps 1870: const char *cp;
1871: time_t t;
1872: char date[32];
1873:
1.18 kristaps 1874: /*
1875: * Here we print our standard mdoc(7) prologue.
1876: * We use the title set with @settitle for the `Nd' description
1877: * and the source document filename (the first one as invoked on
1878: * the command line) for the title.
1879: * The date is set to the current date.
1880: */
1.10 kristaps 1881: t = time(NULL);
1882: strftime(date, sizeof(date), "%F", localtime(&t));
1.1 kristaps 1883:
1.3 kristaps 1884: p->ign--;
1.10 kristaps 1885: teximacroopen(p, "Dd");
1886: texiputchars(p, date);
1887: teximacroclose(p);
1888: teximacroopen(p, "Dt");
1889: for (cp = p->title; '\0' != *cp; cp++)
1890: texiputchar(p, toupper(*cp));
1.11 kristaps 1891: texiputchars(p, " 7");
1.10 kristaps 1892: teximacroclose(p);
1.5 kristaps 1893: teximacro(p, "Os");
1894: teximacro(p, "Sh NAME");
1.10 kristaps 1895: teximacroopen(p, "Nm");
1896: texiputchars(p, p->title);
1897: teximacroclose(p);
1898: teximacroopen(p, "Nd");
1899: texiputchars(p, NULL != p->subtitle ?
1900: p->subtitle : "Unknown description");
1901: teximacroclose(p);
1.11 kristaps 1902: p->seenvs = 1;
1.12 kristaps 1903: dosection(p, cmd, buf, sz, pos);
1.1 kristaps 1904: }
1905:
1906: static void
1907: doitem(struct texi *p, enum texicmd cmd,
1908: const char *buf, size_t sz, size_t *pos)
1909: {
1910:
1.18 kristaps 1911: /* Multitable is using raw tbl(7). */
1912: if (TEXILIST_TABLE == p->list) {
1913: texiputchar(p, '\n');
1914: return;
1915: }
1916:
1.3 kristaps 1917: if (p->outmacro)
1918: texierr(p, "item in open line scope!?");
1919: else if (p->literal)
1920: texierr(p, "item in a literal scope!?");
1921:
1922: switch (p->list) {
1923: case (TEXILIST_ITEM):
1.5 kristaps 1924: teximacroopen(p, "It");
1.3 kristaps 1925: break;
1926: case (TEXILIST_NOITEM):
1.5 kristaps 1927: teximacro(p, "It");
1.3 kristaps 1928: break;
1929: default:
1.11 kristaps 1930: texivspace(p);
1.3 kristaps 1931: break;
1932: }
1.18 kristaps 1933:
1934: /* Trick so we don't start with Pp. */
1.11 kristaps 1935: p->seenvs = 1;
1.3 kristaps 1936: parseeoln(p, buf, sz, pos);
1.1 kristaps 1937:
1.3 kristaps 1938: if (TEXILIST_ITEM == p->list)
1939: teximacroclose(p);
1.9 kristaps 1940: else if (p->outcol > 0)
1.1 kristaps 1941: texiputchar(p, '\n');
1.18 kristaps 1942: }
1943:
1944: static void
1945: dotab(struct texi *p, enum texicmd cmd,
1946: const char *buf, size_t sz, size_t *pos)
1947: {
1948:
1949: /* This command is only useful in @multitable. */
1950: if (TEXILIST_TABLE == p->list)
1951: texiputchar(p, '\t');
1952: }
1953:
1954: static void
1955: domultitable(struct texi *p, enum texicmd cmd,
1956: const char *buf, size_t sz, size_t *pos)
1957: {
1958: enum texilist sv = p->list;
1959: enum texicmd type;
1960: size_t i, end, columns;
1961:
1962: p->list = TEXILIST_TABLE;
1963: teximacro(p, "TS");
1964: columns = 0;
1965:
1966: /* Advance to the first argument... */
1967: while (*pos < sz && isws(buf[*pos]))
1968: advance(p, buf, pos);
1969:
1970: /* Make sure we don't print anything when scanning. */
1971: p->ign++;
1972: if ('@' == buf[*pos]) {
1973: /*
1974: * Look for @columnfractions.
1975: * We ignore these, but we do use the number of
1976: * arguments to set the number of columns that we'll
1977: * have.
1978: */
1979: type = texicmd(p, buf, *pos, sz, &end);
1980: advanceto(p, buf, pos, end);
1981: if (TEXICMD_COLUMNFRACTIONS != type)
1982: texierr(p, "unknown multitable type");
1983: while (*pos < sz && '\n' != buf[*pos]) {
1984: while (*pos < sz && isws(buf[*pos]))
1985: advance(p, buf, pos);
1986: while (*pos < sz && ! isws(buf[*pos])) {
1987: if ('\n' == buf[*pos])
1988: break;
1989: advance(p, buf, pos);
1990: }
1991: columns++;
1992: }
1993: } else
1994: /*
1995: * We have arguments.
1996: * We could parse these, but it's easier to just let
1997: * tbl(7) figure it out.
1998: * So use this only to count arguments.
1999: */
2000: while (parselinearg(p, buf, sz, pos) > 0)
2001: columns++;
2002: p->ign--;
2003:
2004: /* Left-justify each table entry. */
2005: for (i = 0; i < columns; i++) {
2006: if (i > 0)
2007: texiputchar(p, ' ');
2008: texiputchar(p, 'l');
2009: }
2010: texiputchars(p, ".\n");
2011: p->outmacro++;
2012: parseto(p, buf, sz, pos, texitoks[cmd].tok);
2013: p->outmacro--;
2014: teximacro(p, "TE");
2015: p->list = sv;
1.1 kristaps 2016: }
2017:
2018: static void
2019: dotable(struct texi *p, enum texicmd cmd,
2020: const char *buf, size_t sz, size_t *pos)
2021: {
1.3 kristaps 2022: enum texilist sv = p->list;
2023:
2024: p->list = TEXILIST_ITEM;
1.5 kristaps 2025: teximacro(p, "Bl -tag -width Ds");
1.12 kristaps 2026: /* FIXME: ignore and parseeoln. */
2027: advanceeoln(p, buf, sz, pos, 1);
1.11 kristaps 2028: p->seenvs = 1;
1.20 kristaps 2029: parseto(p, buf, sz, pos, texitoks[cmd].tok);
1.5 kristaps 2030: teximacro(p, "El");
1.3 kristaps 2031: p->list = sv;
1.1 kristaps 2032: }
2033:
2034: static void
1.2 kristaps 2035: doenumerate(struct texi *p, enum texicmd cmd,
2036: const char *buf, size_t sz, size_t *pos)
2037: {
1.3 kristaps 2038: enum texilist sv = p->list;
1.2 kristaps 2039:
1.3 kristaps 2040: p->list = TEXILIST_NOITEM;
1.5 kristaps 2041: teximacro(p, "Bl -enum");
1.11 kristaps 2042: p->seenvs = 1;
1.12 kristaps 2043: /* FIXME: ignore and parseeoln. */
2044: advanceeoln(p, buf, sz, pos, 1);
1.2 kristaps 2045: parseto(p, buf, sz, pos, "enumerate");
1.5 kristaps 2046: teximacro(p, "El");
1.3 kristaps 2047: p->list = sv;
1.2 kristaps 2048: }
2049:
2050: static void
1.1 kristaps 2051: doitemize(struct texi *p, enum texicmd cmd,
2052: const char *buf, size_t sz, size_t *pos)
2053: {
1.3 kristaps 2054: enum texilist sv = p->list;
1.1 kristaps 2055:
1.21 ! kristaps 2056: p->list = TEXILIST_NOITEM;
1.5 kristaps 2057: teximacro(p, "Bl -bullet");
1.11 kristaps 2058: p->seenvs = 1;
1.12 kristaps 2059: /* FIXME: ignore and parseeoln. */
2060: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 2061: parseto(p, buf, sz, pos, "itemize");
1.5 kristaps 2062: teximacro(p, "El");
1.3 kristaps 2063: p->list = sv;
1.1 kristaps 2064: }
2065:
2066: static void
2067: doignbracket(struct texi *p, enum texicmd cmd,
2068: const char *buf, size_t sz, size_t *pos)
2069: {
2070:
1.3 kristaps 2071: p->ign++;
1.1 kristaps 2072: parsebracket(p, buf, sz, pos);
1.3 kristaps 2073: p->ign--;
1.1 kristaps 2074: }
2075:
2076: static void
2077: doignline(struct texi *p, enum texicmd cmd,
2078: const char *buf, size_t sz, size_t *pos)
2079: {
2080:
1.12 kristaps 2081: /* FIXME: ignore and parseeoln. */
1.3 kristaps 2082: advanceeoln(p, buf, sz, pos, 1);
1.1 kristaps 2083: }
2084:
1.8 kristaps 2085: /*
2086: * Parse colon-separated directories from "cp" (if not NULL) and returns
2087: * the array of pointers.
2088: * Prepends "base" to the array.
2089: * This does NOT sanitise the directories!
2090: */
1.5 kristaps 2091: static char **
2092: parsedirs(const char *base, const char *cp, size_t *sz)
2093: {
2094: char *tok, *str, *tofree;
2095: const char *cpp;
2096: size_t i;
2097: char **dirs;
2098:
2099: *sz = NULL != (cpp = cp) ? 2 : 1;
2100: if (*sz > 1)
2101: for ( ; NULL != (cpp = strchr(cpp, ':')); (*sz)++)
2102: cpp++;
2103:
2104: dirs = calloc(*sz, sizeof(char *));
2105: if (NULL == dirs) {
2106: perror(NULL);
2107: exit(EXIT_FAILURE);
2108: } else if (NULL == (dirs[0] = strdup(base))) {
2109: perror(NULL);
2110: exit(EXIT_FAILURE);
2111: }
2112:
2113: if (NULL == cp)
2114: return(dirs);
2115:
2116: if (NULL == (tofree = tok = str = strdup(cp))) {
2117: perror(NULL);
2118: exit(EXIT_FAILURE);
2119: }
2120:
2121: for (i = 1; NULL != (tok = strsep(&str, ":")); i++)
2122: if (NULL == (dirs[i] = strdup(tok))) {
2123: perror(NULL);
2124: exit(EXIT_FAILURE);
2125: }
2126:
2127: free(tofree);
2128: return(dirs);
2129: }
2130:
1.1 kristaps 2131: int
2132: main(int argc, char *argv[])
2133: {
2134: struct texi texi;
1.2 kristaps 2135: int c;
2136: char *path, *dir;
1.10 kristaps 2137: const char *progname, *Idir, *cp;
1.1 kristaps 2138:
2139: progname = strrchr(argv[0], '/');
2140: if (progname == NULL)
2141: progname = argv[0];
2142: else
2143: ++progname;
2144:
1.10 kristaps 2145: memset(&texi, 0, sizeof(struct texi));
1.5 kristaps 2146: Idir = NULL;
1.10 kristaps 2147:
1.5 kristaps 2148: while (-1 != (c = getopt(argc, argv, "I:")))
1.1 kristaps 2149: switch (c) {
1.5 kristaps 2150: case ('I'):
2151: Idir = optarg;
2152: break;
1.1 kristaps 2153: default:
2154: goto usage;
2155: }
2156:
2157: argv += optind;
2158: if (0 == (argc -= optind))
2159: goto usage;
2160:
1.2 kristaps 2161: if (NULL == (path = strdup(argv[0]))) {
2162: perror(NULL);
2163: exit(EXIT_FAILURE);
2164: } else if (NULL == (dir = dirname(path))) {
2165: perror(argv[0]);
2166: free(path);
2167: exit(EXIT_FAILURE);
2168: }
2169: free(path);
2170:
1.10 kristaps 2171: if (NULL != (cp = strrchr(argv[0], '/')))
2172: texi.title = strdup(cp + 1);
2173: else
2174: texi.title = strdup(argv[0]);
2175:
2176: if (NULL == texi.title) {
2177: perror(NULL);
2178: exit(EXIT_FAILURE);
2179: } else if (NULL != (path = strchr(texi.title, '.')))
2180: *path = '\0';
2181:
1.3 kristaps 2182: texi.ign = 1;
1.5 kristaps 2183: texi.dirs = parsedirs(dir, Idir, &texi.dirsz);
1.16 kristaps 2184: parsefile(&texi, argv[0], 1);
1.5 kristaps 2185: /* We shouldn't get here. */
1.2 kristaps 2186: texiexit(&texi);
2187: return(EXIT_FAILURE);
1.1 kristaps 2188: usage:
1.8 kristaps 2189: fprintf(stderr, "usage: %s [-Idirs] file\n", progname);
1.1 kristaps 2190: return(EXIT_FAILURE);
2191: }
CVSweb