=================================================================== RCS file: /cvs/texi2mdoc/main.c,v retrieving revision 1.15 retrieving revision 1.16 diff -u -p -r1.15 -r1.16 --- texi2mdoc/main.c 2015/02/19 09:28:42 1.15 +++ texi2mdoc/main.c 2015/02/19 10:20:31 1.16 @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.15 2015/02/19 09:28:42 kristaps Exp $ */ +/* $Id: main.c,v 1.16 2015/02/19 10:20:31 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -37,6 +37,7 @@ */ enum texicmd { TEXICMD_ACRONYM, + TEXICMD_ACUTE, TEXICMD_A4PAPER, TEXICMD_ANCHOR, TEXICMD_APPENDIX, @@ -51,6 +52,7 @@ enum texicmd { TEXICMD_CENTER, TEXICMD_CHAPTER, TEXICMD_CINDEX, + TEXICMD_CIRCUMFLEX, TEXICMD_CITE, TEXICMD_CODE, TEXICMD_COLON, @@ -94,6 +96,7 @@ enum texicmd { TEXICMD_ERROR, TEXICMD_EXAMPLE, TEXICMD_FILE, + TEXICMD_GRAVE, TEXICMD_GROUP, TEXICMD_HEADING, TEXICMD_HEADINGS, @@ -127,6 +130,7 @@ enum texicmd { TEXICMD_NEWLINE, TEXICMD_NODE, TEXICMD_NOINDENT, + TEXICMD_OPTION, TEXICMD_PXREF, TEXICMD_QUESTIONMARK, TEXICMD_QUOTATION, @@ -160,15 +164,18 @@ enum texicmd { TEXICMD_TABLE, TEXICMD_TEX, TEXICMD_TEXSYM, + TEXICMD_TILDE, TEXICMD_TITLE, TEXICMD_TITLEFONT, TEXICMD_TITLEPAGE, TEXICMD_TOP, + TEXICMD_UMLAUT, TEXICMD_UNNUMBERED, TEXICMD_UNNUMBEREDSEC, TEXICMD_UREF, TEXICMD_URL, TEXICMD_VAR, + TEXICMD_VERBATIMINCLUDE, TEXICMD_VSKIP, TEXICMD_W, TEXICMD_XREF, @@ -240,6 +247,7 @@ struct texi { #define ismspace(_x) \ (isws((_x)) || '\n' == (_x)) +static void doaccent(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doblock(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *); @@ -266,9 +274,11 @@ static void dosp(struct texi *, enum texicmd, const ch static void dosubsection(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dotitle(struct texi *, enum texicmd, const char *, size_t, size_t *); +static void doverbinclude(struct texi *, enum texicmd, const char *, size_t, size_t *); static const struct texitok texitoks[TEXICMD__MAX] = { { doignargn, "acronym", 7 }, /* TEXICMD_ACRONYM */ + { doaccent, "'", 1 }, /* TEXICMD_ACUTE */ { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */ { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */ { dosection, "appendix", 8 }, /* TEXICMD_APPENDIX */ @@ -283,6 +293,7 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { doignline, "center", 6 }, /* TEXICMD_CENTER */ { dosection, "chapter", 7 }, /* TEXICMD_CHAPTER */ { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */ + { doaccent, "^", 1 }, /* TEXICMD_CIRCUMFLEX */ { dofont, "code", 4 }, /* TEXICMD_CODE */ { dofont, "cite", 4 }, /* TEXICMD_CITE */ { dosymbol, ":", 1 }, /* TEXICMD_COLON */ @@ -326,6 +337,7 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { dosymbol, "error", 5 }, /* TEXICMD_ERROR */ { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */ { doinline, "file", 4 }, /* TEXICMD_FILE */ + { doaccent, "`", 1 }, /* TEXICMD_GRAVE */ { doblock, "group", 5 }, /* TEXICMD_GROUP */ { dosection, "heading", 7 }, /* TEXICMD_HEADING */ { doignline, "headings", 8 }, /* TEXICMD_HEADINGS */ @@ -359,6 +371,7 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */ { doignline, "node", 4 }, /* TEXICMD_NODE */ { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */ + { doinline, "option", 6 }, /* TEXICMD_OPTION */ { dolink, "pxref", 5 }, /* TEXICMD_PXREF */ { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */ { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */ @@ -392,15 +405,18 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { dotable, "table", 5 }, /* TEXICMD_TABLE */ { doignblock, "tex", 3 }, /* TEXICMD_TEX */ { dosymbol, "TeX", 3 }, /* TEXICMD_TEXSYM */ + { doaccent, "~", 1 }, /* TEXICMD_TILDE */ { doignline, "title", 5 }, /* TEXICMD_TITLE */ { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */ { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */ { dotop, "top", 3 }, /* TEXICMD_TOP */ + { doaccent, "\"", 1 }, /* TEXICMD_UMLAUT */ { dosection, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */ { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */ { dolink, "uref", 4 }, /* TEXICMD_UREF */ { dolink, "url", 3 }, /* TEXICMD_URL */ { doinline, "var", 3 }, /* TEXICMD_VAR */ + { doverbinclude, "verbatiminclude", 15 }, /* TEXICMD_VERBATIMINCLUDE */ { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */ { dobracket, "w", 1 }, /* TEXICMD_W */ { dolink, "xref", 4 }, /* TEXICMD_XREF */ @@ -1069,11 +1085,12 @@ parseto(struct texi *p, const char *buf, * This can be called in a nested context. */ static void -parsefile(struct texi *p, const char *fname) +parsefile(struct texi *p, const char *fname, int parse) { - struct texifile *f; - int fd; - struct stat st; + struct texifile *f; + int fd; + struct stat st; + size_t i; assert(p->filepos < 64); f = &p->files[p->filepos]; @@ -1096,7 +1113,23 @@ parsefile(struct texi *p, const char *fname) texiabort(p, fname); p->filepos++; - parseeof(p, f->map, f->mapsz); + if ( ! parse) { + /* + * We're printing verbatim output. + * Make sure it doesn't get interpreted as mdoc by + * escaping escapes and making sure leading dots don't + * trigger mdoc(7) expansion. + */ + for (i = 0; i < f->mapsz; i++) { + if (i > 0 && '.' == f->map[i]) + if ('\n' == f->map[i - 1]) + fputs("\\&", stdout); + putchar(f->map[i]); + if ('\\' == f->map[i]) + putchar('e'); + } + } else + parseeof(p, f->map, f->mapsz); texifilepop(p); } @@ -1257,6 +1290,9 @@ doinline(struct texi *p, enum texicmd cmd, case (TEXICMD_FILE): macro = "Pa"; break; + case (TEXICMD_OPTION): + macro = "Op"; + break; case (TEXICMD_VAR): macro = "Va"; break; @@ -1277,6 +1313,46 @@ doinline(struct texi *p, enum texicmd cmd, } static void +doverbinclude(struct texi *p, enum texicmd cmd, + const char *buf, size_t sz, size_t *pos) +{ + char fname[PATH_MAX], path[PATH_MAX]; + int rc; + size_t i; + + while (*pos < sz && ' ' == buf[*pos]) + advance(p, buf, pos); + + /* Read in the filename. */ + for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) { + if (i == sizeof(fname) - 1) + break; + fname[i] = buf[*pos]; + advance(p, buf, pos); + } + + if (i == 0) + texierr(p, "path too short"); + else if ('\n' != buf[*pos]) + texierr(p, "path too long"); + else if ('/' == fname[0]) + texierr(p, "no absolute paths"); + fname[i] = '\0'; + + if (strstr(fname, "../") || strstr(fname, "/..")) + texierr(p, "insecure path"); + + rc = snprintf(path, sizeof(path), + "%s/%s", p->dirs[0], fname); + if (rc < 0) + texierr(p, "couldn't format path"); + else if ((size_t)rc >= sizeof(path)) + texierr(p, "path too long"); + + parsefile(p, path, 0); +} + +static void doinclude(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -1316,7 +1392,7 @@ doinclude(struct texi *p, enum texicmd cmd, else if (-1 == access(path, R_OK)) continue; - parsefile(p, path); + parsefile(p, path, 1); return; } @@ -1420,6 +1496,89 @@ dotitle(struct texi *p, enum texicmd cmd, } static void +doaccent(struct texi *p, enum texicmd cmd, + const char *buf, size_t sz, size_t *pos) +{ + + if (*pos == sz) + return; + advance(p, buf, pos); + switch (cmd) { + case (TEXICMD_ACUTE): + switch (buf[*pos]) { + case ('a'): case ('A'): + case ('e'): case ('E'): + case ('i'): case ('I'): + case ('o'): case ('O'): + case ('u'): case ('U'): + texiputchars(p, "\\(\'"); + texiputchar(p, buf[*pos]); + break; + default: + texiputchar(p, buf[*pos]); + } + break; + case (TEXICMD_CIRCUMFLEX): + switch (buf[*pos]) { + case ('a'): case ('A'): + case ('e'): case ('E'): + case ('i'): case ('I'): + case ('o'): case ('O'): + case ('u'): case ('U'): + texiputchars(p, "\\(^"); + texiputchar(p, buf[*pos]); + break; + default: + texiputchar(p, buf[*pos]); + } + break; + case (TEXICMD_GRAVE): + switch (buf[*pos]) { + case ('a'): case ('A'): + case ('e'): case ('E'): + case ('i'): case ('I'): + case ('o'): case ('O'): + case ('u'): case ('U'): + texiputchars(p, "\\(`"); + texiputchar(p, buf[*pos]); + break; + default: + texiputchar(p, buf[*pos]); + } + break; + case (TEXICMD_TILDE): + switch (buf[*pos]) { + case ('a'): case ('A'): + case ('n'): case ('N'): + case ('o'): case ('O'): + texiputchars(p, "\\(~"); + texiputchar(p, buf[*pos]); + break; + default: + texiputchar(p, buf[*pos]); + } + break; + case (TEXICMD_UMLAUT): + switch (buf[*pos]) { + case ('a'): case ('A'): + case ('e'): case ('E'): + case ('i'): case ('I'): + case ('o'): case ('O'): + case ('u'): case ('U'): + case ('y'): + texiputchars(p, "\\(:"); + texiputchar(p, buf[*pos]); + break; + default: + texiputchar(p, buf[*pos]); + } + break; + default: + abort(); + } +} + +static void dosymbol(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -1869,7 +2028,7 @@ main(int argc, char *argv[]) texi.ign = 1; texi.dirs = parsedirs(dir, Idir, &texi.dirsz); - parsefile(&texi, argv[0]); + parsefile(&texi, argv[0], 1); /* We shouldn't get here. */ texiexit(&texi); return(EXIT_FAILURE);