=================================================================== RCS file: /cvs/texi2mdoc/main.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -p -r1.1 -r1.2 --- texi2mdoc/main.c 2015/02/16 22:24:43 1.1 +++ texi2mdoc/main.c 2015/02/17 10:27:18 1.2 @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.1 2015/02/16 22:24:43 kristaps Exp $ */ +/* $Id: main.c,v 1.2 2015/02/17 10:27:18 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -32,8 +34,11 @@ * different story. */ enum texicmd { + TEXICMD_ACRONYM, TEXICMD_A4PAPER, TEXICMD_ANCHOR, + TEXICMD_APPENDIX, + TEXICMD_APPENDIXSEC, TEXICMD_AT, TEXICMD_BYE, TEXICMD_CHAPTER, @@ -41,22 +46,27 @@ enum texicmd { TEXICMD_CODE, TEXICMD_COMMAND, TEXICMD_COMMENT, + TEXICMD_COMMENT_LONG, TEXICMD_CONTENTS, TEXICMD_COPYING, TEXICMD_COPYRIGHT, TEXICMD_DETAILMENU, TEXICMD_DIRCATEGORY, TEXICMD_DIRENTRY, + TEXICMD_DOTS, TEXICMD_EMAIL, TEXICMD_EMPH, TEXICMD_END, + TEXICMD_ENUMERATE, TEXICMD_EXAMPLE, TEXICMD_FILE, + TEXICMD_HEADING, TEXICMD_I, TEXICMD_IFHTML, TEXICMD_IFNOTTEX, TEXICMD_IFTEX, TEXICMD_IMAGE, + TEXICMD_INCLUDE, TEXICMD_ITEM, TEXICMD_ITEMIZE, TEXICMD_KBD, @@ -65,6 +75,7 @@ enum texicmd { TEXICMD_NODE, TEXICMD_QUOTATION, TEXICMD_PARINDENT, + TEXICMD_PRINTINDEX, TEXICMD_REF, TEXICMD_SAMP, TEXICMD_SECTION, @@ -79,6 +90,7 @@ enum texicmd { TEXICMD_TITLEPAGE, TEXICMD_TOP, TEXICMD_UNNUMBERED, + TEXICMD_UNNUMBEREDSEC, TEXICMD_URL, TEXICMD_VAR, TEXICMD__MAX @@ -98,6 +110,9 @@ struct texifile { struct texi; +/* + * Callback for functions implementing texi commands. + */ typedef void (*texicmdfp)(struct texi *, enum texicmd, const char *, size_t, size_t *); @@ -124,24 +139,30 @@ struct texi { size_t outcol; /* column of output */ int outmacro; /* whether output is in line macro */ int seenws; /* whitespace has been ignored */ + char *dir; /* texi directory */ }; +/* FIXME: don't use this crap. */ #define ismpunct(_x) \ ('.' == (_x) || \ ',' == (_x) || \ ';' == (_x)) +#define isws(_x) \ + (' ' == (_x) || '\t' == (_x)) static void doarg1(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dobracket(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dobye(struct texi *, enum texicmd, const char *, size_t, size_t *); static void docommand(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doemph(struct texi *, enum texicmd, const char *, size_t, size_t *); +static void doenumerate(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doexample(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dofile(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doifnottex(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doignblock(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doignbracket(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doignline(struct texi *, enum texicmd, const char *, size_t, size_t *); +static void doinclude(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doitalic(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *); @@ -155,8 +176,11 @@ static void dosubsection(struct texi *, enum texicmd, static void dosymbol(struct texi *, enum texicmd, const char *, size_t, size_t *); static const struct texitok texitoks[TEXICMD__MAX] = { + { doarg1, "acronym", 7 }, /* TEXICMD_ACRONYM */ { doignline, "afourpaper", 10 }, /* TEXICMD_A4PAPER */ { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */ + { dosh, "appendix", 8 }, /* TEXICMD_APPENDIX */ + { dosh, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */ { dosymbol, "@", 1 }, /* TEXICMD_AT */ { dobye, "bye", 3 }, /* TEXICMD_BYE */ { dosh, "chapter", 7 }, /* TEXICMD_CHAPTER */ @@ -164,22 +188,27 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { doliteral, "code", 4 }, /* TEXICMD_CODE */ { docommand, "command", 7 }, /* TEXICMD_COMMAND */ { doignline, "c", 1 }, /* TEXICMD_COMMENT */ + { doignline, "comment", 7 }, /* TEXICMD_COMMENT_LONG */ { doignline, "contents", 8 }, /* TEXICMD_CONTENTS */ { doignblock, "copying", 7 }, /* TEXICMD_COPYING */ { dosymbol, "copyright", 9 }, /* TEXICMD_COPYRIGHT */ { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */ { doignline, "dircategory", 11 }, /* TEXICMD_DIRCATEGORY */ { doignblock, "direntry", 8 }, /* TEXICMD_DIRENTRY */ + { dosymbol, "dots", 4 }, /* TEXICMD_DOTS */ { doarg1, "email", 5 }, /* TEXICMD_EMAIL */ { doemph, "emph", 4 }, /* TEXICMD_EMPH */ { NULL, "end", 3 }, /* TEXICMD_END */ + { doenumerate, "enumerate", 9 }, /* TEXICMD_ENUMERATE */ { doexample, "example", 7 }, /* TEXICMD_EXAMPLE */ { dofile, "file", 4 }, /* TEXICMD_FILE */ + { dosection, "heading", 7 }, /* TEXICMD_HEADING */ { doitalic, "i", 1 }, /* TEXICMD_I */ { doignblock, "ifhtml", 6 }, /* TEXICMD_IFHTML */ { doifnottex, "ifnottex", 8 }, /* TEXICMD_IFNOTTEX */ { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */ { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */ + { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */ { doitem, "item", 4 }, /* TEXICMD_ITEM */ { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */ { doliteral, "kbd", 3 }, /* TEXICMD_KBD */ @@ -187,6 +216,7 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { doignblock, "menu", 4 }, /* TEXICMD_MENU */ { doignline, "node", 4 }, /* TEXICMD_NODE */ { doquotation, "quotation", 9 }, /* TEXICMD_QUOTATION */ + { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */ { doignline, "paragraphindent", 14 }, /* TEXICMD_PARINDENT */ { dobracket, "ref", 3 }, /* TEXICMD_REF */ { doliteral, "samp", 4 }, /* TEXICMD_SAMP */ @@ -202,10 +232,14 @@ static const struct texitok texitoks[TEXICMD__MAX] = { { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */ { dotop, "top", 3 }, /* TEXICMD_TOP */ { dosh, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */ + { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */ { doarg1, "url", 3 }, /* TEXICMD_URL */ { doliteral, "var", 3 }, /* TEXICMD_VAR */ }; +/* + * Unmap the top-most file that we're using. + */ static void texifilepop(struct texi *p) { @@ -216,16 +250,25 @@ texifilepop(struct texi *p) munmap(f->map, f->mapsz); } +/* + * Unmap all files that we're currently using. + * The utility should exit(...) after this is called. + */ static void texiexit(struct texi *p) { while (p->filepos > 0) texifilepop(p); + free(p->dir); } +/* + * Fatal error: unmap all files and exit. + * The "errstring" is passed to perror(3). + */ static void -texifatal(struct texi *p, const char *errstring) +texiabort(struct texi *p, const char *errstring) { perror(errstring); @@ -242,7 +285,7 @@ texiwarn(const struct texi *p, const char *fmt, ...) { va_list ap; - fprintf(stderr, "%s:%zu:%zu: ", + fprintf(stderr, "%s:%zu:%zu: warning: ", p->files[p->filepos - 1].name, p->files[p->filepos - 1].line + 1, p->files[p->filepos - 1].col + 1); @@ -252,6 +295,23 @@ texiwarn(const struct texi *p, const char *fmt, ...) fputc('\n', stderr); } +static void +texierr(struct texi *p, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s:%zu:%zu: error: ", + p->files[p->filepos - 1].name, + p->files[p->filepos - 1].line + 1, + p->files[p->filepos - 1].col + 1); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + texiexit(p); + exit(EXIT_FAILURE); +} + /* * Put a single data character. * This MUST NOT be a mdoc(7) command: it should be free text that's @@ -442,7 +502,7 @@ texicmd(struct texi *p, const char *buf, assert('@' == buf[pos]); for (*end = ++pos; *end < sz && ! isspace(buf[*end]); (*end)++) - if ('@' == buf[*end] || '{' == buf[*end]) + if ((*end > pos && '@' == buf[*end]) || '{' == buf[*end]) break; len = *end - pos; @@ -556,7 +616,7 @@ parseto(struct texi *p, const char *buf, cmd = texicmd(p, buf, *pos, sz, &end); advanceto(p, buf, pos, end); if (TEXICMD_END == cmd) { - while (*pos < sz && ' ' == buf[*pos]) + while (*pos < sz && isws(buf[*pos])) advance(p, buf, pos); /* * FIXME: skip tabs and also check the full @@ -577,6 +637,38 @@ parseto(struct texi *p, const char *buf, } static void +parsefile(struct texi *p, const char *fname) +{ + struct texifile *f; + int fd; + struct stat st; + + assert(p->filepos < 64); + f = &p->files[p->filepos]; + memset(f, 0, sizeof(struct texifile)); + + f->name = fname; + if (-1 == (fd = open(fname, O_RDONLY, 0))) { + texiabort(p, fname); + } else if (-1 == fstat(fd, &st)) { + close(fd); + texiabort(p, fname); + } + + f->mapsz = st.st_size; + f->map = mmap(NULL, f->mapsz, + PROT_READ, MAP_SHARED, fd, 0); + close(fd); + + if (MAP_FAILED == f->map) + texiabort(p, fname); + + p->filepos++; + parseeof(p, f->map, f->mapsz); + texifilepop(p); +} + +static void doignblock(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -649,6 +741,47 @@ doinline(struct texi *p, const char *buf, } static void +doinclude(struct texi *p, enum texicmd cmd, + const char *buf, size_t sz, size_t *pos) +{ + char fname[PATH_MAX], path[PATH_MAX]; + size_t i; + int rc; + + while (*pos < sz && ' ' == buf[*pos]) + advance(p, buf, pos); + + /* Read in the filename. */ + for (i = 0; *pos < sz && '\n' != buf[*pos]; i++) { + if (i == sizeof(fname) - 1) + break; + fname[i] = buf[*pos]; + advance(p, buf, pos); + } + + if (i == 0) + texierr(p, "path too short"); + else if ('\n' != buf[*pos]) + texierr(p, "path too long"); + else if ('/' == fname[0]) + texierr(p, "no absolute paths"); + fname[i] = '\0'; + + if (strstr(fname, "../") || strstr(fname, "/..")) + texierr(p, "insecure path"); + + /* Append filename to original name's directory. */ + rc = snprintf(path, sizeof(path), "%s/%s", p->dir, fname); + if (rc < 0) + texierr(p, "couldn't format filename"); + else if ((size_t)rc >= sizeof(path)) + texierr(p, "path too long"); + + /* Pump through to parser. */ + parsefile(p, path); +} + +static void doitalic(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -745,6 +878,9 @@ dosymbol(struct texi *p, enum texicmd cmd, case (TEXICMD_COPYRIGHT): texiputchars(p, "\\(co"); break; + case (TEXICMD_DOTS): + texiputchars(p, "..."); + break; case (TEXICMD_LATEX): texiputchars(p, "LaTeX"); break; @@ -835,7 +971,7 @@ dosection(struct texi *p, enum texicmd cmd, advanceeoln(p, buf, sz, pos); return; } - while (*pos < sz && ' ' == buf[*pos]) + while (*pos < sz && isws(buf[*pos]) ) advance(p, buf, pos); texifputs(p, ".Ss "); while (*pos < sz && '\n' != buf[*pos]) { @@ -854,7 +990,7 @@ dosh(struct texi *p, enum texicmd cmd, advanceeoln(p, buf, sz, pos); return; } - while (*pos < sz && ' ' == buf[*pos]) + while (*pos < sz && isws(buf[*pos])) advance(p, buf, pos); texifputs(p, ".Sh "); while (*pos < sz && '\n' != buf[*pos]) { @@ -871,7 +1007,7 @@ dotop(struct texi *p, enum texicmd cmd, p->flags &= ~TEXI_HEADER; advanceeoln(p, buf, sz, pos); - teximacro(p, ".Dd $Mdocdate: February 16 2015 $"); + teximacro(p, ".Dd $Mdocdate: February 17 2015 $"); teximacro(p, ".Dt SOMETHING 7"); teximacro(p, ".Os"); teximacro(p, ".Sh NAME"); @@ -887,7 +1023,7 @@ doitem(struct texi *p, enum texicmd cmd, /* See if we have arguments... */ for (end = *pos; end < sz; end++) - if (' ' != buf[end] && '\t' != buf[end]) + if ( ! isws(buf[end])) break; /* If we have arguments, print them too. */ @@ -914,6 +1050,16 @@ dotable(struct texi *p, enum texicmd cmd, } static void +doenumerate(struct texi *p, enum texicmd cmd, + const char *buf, size_t sz, size_t *pos) +{ + + teximacro(p, ".Bl -enum"); + parseto(p, buf, sz, pos, "enumerate"); + teximacro(p, ".El"); +} + +static void doitemize(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -944,46 +1090,12 @@ doignline(struct texi *p, enum texicmd cmd, advance(p, buf, pos); } -static int -parsefile(struct texi *p, const char *fname) -{ - struct texifile *f; - int fd; - struct stat st; - - assert(p->filepos < 64); - f = &p->files[p->filepos]; - memset(f, 0, sizeof(struct texifile)); - - f->name = fname; - if (-1 == (fd = open(fname, O_RDONLY, 0))) { - texifatal(p, fname); - } else if (-1 == fstat(fd, &st)) { - close(fd); - texifatal(p, fname); - } - - f->mapsz = st.st_size; - f->map = mmap(NULL, f->mapsz, - PROT_READ, MAP_SHARED, fd, 0); - close(fd); - - if (MAP_FAILED == f->map) { - texifatal(p, fname); - return(0); - } - - p->filepos++; - parseeof(p, f->map, f->mapsz); - texifilepop(p); - return(1); -} - int main(int argc, char *argv[]) { struct texi texi; - int c, rc; + int c; + char *path, *dir; const char *progname; progname = strrchr(argv[0], '/'); @@ -1002,11 +1114,22 @@ main(int argc, char *argv[]) if (0 == (argc -= optind)) goto usage; + if (NULL == (path = strdup(argv[0]))) { + perror(NULL); + exit(EXIT_FAILURE); + } else if (NULL == (dir = dirname(path))) { + perror(argv[0]); + free(path); + exit(EXIT_FAILURE); + } + free(path); + memset(&texi, 0, sizeof(struct texi)); texi.flags = TEXI_HEADER; - rc = parsefile(&texi, argv[0]); - return(rc ? EXIT_SUCCESS : EXIT_FAILURE); - + texi.dir = strdup(dir); + parsefile(&texi, argv[0]); + texiexit(&texi); + return(EXIT_FAILURE); usage: fprintf(stderr, "usage: %s file\n", progname); return(EXIT_FAILURE);