=================================================================== RCS file: /cvs/texi2mdoc/main.c,v retrieving revision 1.68 retrieving revision 1.73 diff -u -p -r1.68 -r1.73 --- texi2mdoc/main.c 2015/03/12 10:44:34 1.68 +++ texi2mdoc/main.c 2018/11/13 10:19:16 1.73 @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.68 2015/03/12 10:44:34 kristaps Exp $ */ +/* $Id: main.c,v 1.73 2018/11/13 10:19:16 schwarze Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * @@ -17,6 +17,9 @@ #if defined(__linux__) || defined(__MINT__) # define _GNU_SOURCE /* memmem */ #endif + +#include + #include #include #include @@ -26,6 +29,7 @@ #include #include #include +#include #include #include "extern.h" @@ -376,7 +380,7 @@ dodefindex(struct texi *p, enum texicmd cmd, size_t *p if (*pos == BUFSZ(p)) { texiwarn(p, "unexpected EOF"); return; - } + } if (0 == *pos - start) texiwarn(p, "zero-length index definition"); else @@ -464,7 +468,7 @@ dodefn(struct texi *p, enum texicmd cmd, size_t *pos) case (TEXICMD_DEFMAC): case (TEXICMD_DEFMACX): teximacroopen(p, "Dv"); - while (parselinearg(p, pos)) + while (parselinearg(p, pos)) /* Spin. */ ; teximacroclose(p); break; @@ -476,7 +480,7 @@ dodefn(struct texi *p, enum texicmd cmd, size_t *pos) parselinearg(p, pos); teximacroclose(p); teximacroopen(p, "Fa"); - while (parselinearg(p, pos)) + while (parselinearg(p, pos)) /* Spin. */ ; teximacroclose(p); teximacro(p, "Fc"); @@ -496,7 +500,7 @@ dodefn(struct texi *p, enum texicmd cmd, size_t *pos) parselinearg(p, pos); teximacroclose(p); teximacroopen(p, "Fa"); - while (parselinearg(p, pos)) + while (parselinearg(p, pos)) /* Spin. */ ; teximacroclose(p); teximacro(p, "Fc"); @@ -508,7 +512,7 @@ dodefn(struct texi *p, enum texicmd cmd, size_t *pos) case (TEXICMD_DEFTYPEVR): case (TEXICMD_DEFTYPEVRX): teximacroopen(p, "Vt"); - while (parselinearg(p, pos)) + while (parselinearg(p, pos)) /* Spin. */ ; teximacroclose(p); break; @@ -519,7 +523,7 @@ dodefn(struct texi *p, enum texicmd cmd, size_t *pos) case (TEXICMD_DEFVR): case (TEXICMD_DEFVRX): teximacroopen(p, "Va"); - while (parselinearg(p, pos)) + while (parselinearg(p, pos)) /* Spin. */ ; teximacroclose(p); break; @@ -607,9 +611,17 @@ domacro(struct texi *p, enum texicmd cmd, size_t *pos) m.key[end - start] = '\0'; m.args = argparse(p, pos, &m.argsz, 0); + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } /* Note: we advance to the beginning of the macro. */ advanceeoln(p, pos, 1); + if ((start = *pos) == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } /* * According to the Texinfo manual, the macro ends on the @@ -622,7 +634,6 @@ domacro(struct texi *p, enum texicmd cmd, size_t *pos) * @end macro without the leading newline else we might look * past empty macros. */ - start = *pos; endtok = "@end macro\n"; endtoksz = strlen(endtok); blk = memmem(&BUF(p)[start], BUFSZ(p) - start, endtok, endtoksz); @@ -632,7 +643,7 @@ domacro(struct texi *p, enum texicmd cmd, size_t *pos) while (&BUF(p)[*pos] != blk) advance(p, pos); assert('@' == BUF(p)[*pos]); - if ('\n' != BUF(p)[*pos - 1]) + if ('\n' != BUF(p)[*pos - 1]) texierr(p, "cannot handle @end macro in-line"); len = blk - &BUF(p)[start]; @@ -644,7 +655,7 @@ domacro(struct texi *p, enum texicmd cmd, size_t *pos) p->macros = realloc (p->macros, - (p->macrosz + 1) * + (p->macrosz + 1) * sizeof(struct teximacro)); if (NULL == p->macros) texiabort(p, NULL); @@ -660,24 +671,24 @@ doignblock(struct texi *p, enum texicmd cmd, size_t *p const char *endt, *startt; size_t esz, ssz, newpos, stack; - /* + /* * FIXME: this is cheating. * These tokens are supposed to begin on a newline. * However, if we do that, then we would need to check within * the loop for trailer (or leading, as the case may be) * newline, and that's just a bit too complicated right now. * This is becasue - * @ifset BAR - * @ifset FOO - * @end ifset - * @end ifset + * @ifset BAR + * @ifset FOO + * @end ifset + * @end ifset * won't work right now: we'd read after the first "@end ifset" * to the next line, then look for the next line after that. */ - ssz = snprintf(start, sizeof(start), + ssz = snprintf(start, sizeof(start), "@%s", texitoks[cmd].tok); assert(ssz < sizeof(start)); - esz = snprintf(end, sizeof(end), + esz = snprintf(end, sizeof(end), "@end %s", texitoks[cmd].tok); assert(esz < sizeof(end)); stack = 1; @@ -699,7 +710,7 @@ doignblock(struct texi *p, enum texicmd cmd, size_t *p "block", texitoks[cmd].tok); *pos = BUFSZ(p); break; - } + } newpos = *pos; if (NULL == startt || startt > endt) { @@ -719,7 +730,7 @@ doignblock(struct texi *p, enum texicmd cmd, size_t *p static void doblock(struct texi *p, enum texicmd cmd, size_t *pos) { - + parseto(p, pos, texitoks[cmd].tok); } @@ -775,9 +786,9 @@ doinline(struct texi *p, enum texicmd cmd, size_t *pos return; } - /* + /* * If we haven't seen any whitespace, then we don't want the - * subsequent macro to insert any whitespace. + * subsequent macro to insert any whitespace. */ if (p->outmacro && 0 == p->seenws) { teximacroopen(p, "Ns"); @@ -822,7 +833,7 @@ doverb(struct texi *p, enum texicmd cmd, size_t *pos) start = *pos; /* Read until we see the delimiter then end-brace. */ while (*pos < BUFSZ(p) - 1) { - if (BUF(p)[*pos] == delim && BUF(p)[*pos + 1] == '}') + if (BUF(p)[*pos] == delim && BUF(p)[*pos + 1] == '}') break; advance(p, pos); } @@ -934,9 +945,9 @@ doverbatim(struct texi *p, enum texicmd cmd, size_t *p static void doverbinclude(struct texi *p, enum texicmd cmd, size_t *pos) { - char fname[PATH_MAX], path[PATH_MAX]; - int rc; - size_t i, end; + char fname[PATH_MAX], path[PATH_MAX]; + int rc; + size_t i, end; const char *v; enum texicmd type; @@ -953,7 +964,7 @@ doverbinclude(struct texi *p, enum texicmd cmd, size_t } type = texicmd(p, *pos, &end, NULL); advanceto(p, pos, end); - if (TEXICMD_VALUE != type) + if (TEXICMD_VALUE != type) texierr(p, "unknown verbatiminclude command"); v = valueblookup(p, pos); if (NULL == v) @@ -978,9 +989,9 @@ doverbinclude(struct texi *p, enum texicmd cmd, size_t if (strstr(fname, "../") || strstr(fname, "/..")) texierr(p, "insecure path"); - rc = snprintf(path, sizeof(path), + rc = snprintf(path, sizeof(path), "%s/%s", p->dirs[0], fname); - if (rc < 0) + if (rc < 0) texierr(p, "couldn't format path"); else if ((size_t)rc >= sizeof(path)) texierr(p, "path too long"); @@ -991,9 +1002,9 @@ doverbinclude(struct texi *p, enum texicmd cmd, size_t static void doinclude(struct texi *p, enum texicmd cmd, size_t *pos) { - char fname[PATH_MAX], path[PATH_MAX]; - size_t i, end; - int rc; + char fname[PATH_MAX], path[PATH_MAX]; + size_t i, end; + int rc; const char *v; enum texicmd type; @@ -1011,7 +1022,7 @@ doinclude(struct texi *p, enum texicmd cmd, size_t *po } type = texicmd(p, *pos, &end, NULL); advanceto(p, pos, end); - if (TEXICMD_VALUE != type) + if (TEXICMD_VALUE != type) texierr(p, "unknown include command"); v = valueblookup(p, pos); if (NULL == v) @@ -1037,9 +1048,9 @@ doinclude(struct texi *p, enum texicmd cmd, size_t *po texierr(p, "insecure path"); for (i = 0; i < p->dirsz; i++) { - rc = snprintf(path, sizeof(path), + rc = snprintf(path, sizeof(path), "%s/%s", p->dirs[i], fname); - if (rc < 0) + if (rc < 0) texierr(p, "couldn't format path"); else if ((size_t)rc >= sizeof(path)) texierr(p, "path too long"); @@ -1160,7 +1171,7 @@ doaccent(struct texi *p, enum texicmd cmd, size_t *pos if ('{' == BUF(p)[*pos]) { brace = 1; advance(p, pos); - } else if (isalpha((unsigned char)texitoks[cmd].tok[0])) + } else if (isalpha((unsigned char)texitoks[cmd].tok[0])) while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); @@ -1278,7 +1289,7 @@ doaccent(struct texi *p, enum texicmd cmd, size_t *pos } if (*pos < BUFSZ(p)) advance(p, pos); - } + } switch (cmd) { case (TEXICMD_TIEACCENT): @@ -1492,7 +1503,7 @@ dosymbol(struct texi *p, enum texicmd cmd, size_t *pos static void doquotation(struct texi *p, enum texicmd cmd, size_t *pos) { - + teximacro(p, "Qo"); parseto(p, pos, "quotation"); teximacro(p, "Qc"); @@ -1509,12 +1520,16 @@ indexcmp(const void *p1, const void *p2) static void doprintindex(struct texi *p, enum texicmd cmd, size_t *pos) { - size_t i, j, start, end, len; + static size_t guard = 0; + size_t i, j, start, end, len; #if HAVE_INDEX - char *cp; - char buf[PATH_MAX]; + char *cp; + char buf[PATH_MAX]; #endif + if (guard++ > 8) + texierr(p, "recursive @printindex"); + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); start = *pos; @@ -1522,12 +1537,18 @@ doprintindex(struct texi *p, enum texicmd cmd, size_t advance(p, pos); if ((end = *pos) == BUFSZ(p)) { texiwarn(p, "unexpected EOF"); + guard--; return; } advance(p, pos); - if (0 == (len = end - start)) { + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + guard--; + return; + } else if (0 == (len = end - start)) { texiwarn(p, "zero-length index"); + guard--; return; } @@ -1542,13 +1563,16 @@ doprintindex(struct texi *p, enum texicmd cmd, size_t if (i == p->indexsz) { texiwarn(p, "cannot find index"); + guard--; return; - } else if (0 == p->indexs[i].indexsz) + } else if (0 == p->indexs[i].indexsz) { + guard--; return; + } /* Alphabetically sort our indices. */ - qsort(p->indexs[i].index, - p->indexs[i].indexsz, + qsort(p->indexs[i].index, + p->indexs[i].indexsz, sizeof(struct texiterm), indexcmp); texivspace(p); @@ -1578,7 +1602,7 @@ doprintindex(struct texi *p, enum texicmd cmd, size_t p->literal++; } #endif - texisplice(p, p->indexs[i].index[j].term, + texisplice(p, p->indexs[i].index[j].term, strlen(p->indexs[i].index[j].term), *pos); parseeoln(p, pos); #if HAVE_INDEX @@ -1591,6 +1615,7 @@ doprintindex(struct texi *p, enum texicmd cmd, size_t p->seenvs = 0; teximacro(p, "El"); texivspace(p); + guard--; } static void @@ -1628,7 +1653,7 @@ donode(struct texi *p, enum texicmd cmd, size_t *pos) p->nodecur = texicache(p, &BUF(p)[start], end - start); if (NULL != p->chapters) { - snprintf(fname, sizeof(fname), + snprintf(fname, sizeof(fname), "%s-%zd.7", p->chapters, p->nodecur); p->outfile = fopen(fname, "w"); if (NULL == p->outfile) @@ -1658,21 +1683,21 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) { size_t nodename, entryname; size_t nodenameend, entrynameend, i; - ssize_t ppos, lastppos; + ssize_t ppos, lastppos; char buf[PATH_MAX]; enum texicmd tcmd; advanceeoln(p, pos, 1); - /* - * Parse past initial stuff. + /* + * Parse past initial stuff. * TODO: the manual says we're supposed to make this in bold or * something. */ while (*pos < BUFSZ(p)) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - if ('*' != BUF(p)[*pos]) { + if (*pos < BUFSZ(p) && '*' != BUF(p)[*pos]) { if (TEXICMD_END == peeklinecmd(p, *pos)) break; parseeoln(p, pos); @@ -1684,8 +1709,8 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) texivspace(p); teximacro(p, "Bl -tag -width Ds -compact"); while (*pos < BUFSZ(p)) { - /* - * Read to next menu item. + /* + * Read to next menu item. * We simply parse every line until we get a magic '*'. * These lines might occur interspersed OR as the * description of an entry. @@ -1695,16 +1720,19 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) p->seenws = *pos < BUFSZ(p) && isws(BUF(p)[*pos]); while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - if ('*' != BUF(p)[*pos]) { + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } else if ('*' != BUF(p)[*pos]) { tcmd = peeklinecmd(p, *pos); if (TEXICMD_END == tcmd) break; - else if (TEXICMD_COMMENT == tcmd) + else if (TEXICMD_COMMENT == tcmd) advanceeoln(p, pos, 1); else parseeoln(p, pos); continue; - } + } /* Now we're parsing a menu item. */ advance(p, pos); @@ -1714,6 +1742,10 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) while (*pos < BUFSZ(p) && ':' != BUF(p)[*pos]) advance(p, pos); entrynameend = *pos; + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } advance(p, pos); p->seenvs = 0; @@ -1735,7 +1767,7 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) if (*pos + 1 == BUFSZ(p)) { advance(p, pos); continue; - } + } if (' ' == BUF(p)[*pos + 1]) { advance(p, pos); break; @@ -1754,7 +1786,7 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) nodename = entryname; nodenameend = entrynameend; } - ppos = texicache(p, &BUF(p)[nodename], + ppos = texicache(p, &BUF(p)[nodename], nodenameend - nodename); if (-1 != lastppos) p->nodecache[lastppos].next = ppos; @@ -1773,7 +1805,7 @@ domenu(struct texi *p, enum texicmd cmd, size_t *pos) texiputchars(p, "\""); teximacroclose(p); } else { - snprintf(buf, sizeof(buf), + snprintf(buf, sizeof(buf), "%s-%zd 7 ", p->chapters, ppos); teximacroopen(p, "Xr"); texiputchars(p, buf); @@ -1956,6 +1988,13 @@ dosection(struct texi *p, enum texicmd cmd, size_t *po switch (cmd) { case (TEXICMD_TOP): sec = 0; + if (p->nodesz) + break; + texiwarn(p, "@node Top is missing, assuming it implicitly"); + p->nodesz++; + p->ign--; + p->nodecur = texicache(p, "Top", 3); + teximdocopen(p, pos); break; case (TEXICMD_APPENDIX): case (TEXICMD_CHAPTER): @@ -1991,7 +2030,7 @@ dosection(struct texi *p, enum texicmd cmd, size_t *po if (sec < 2) p->seenvs = -1; - else + else texivspace(p); teximacroopen(p, sects[sec]); @@ -2024,8 +2063,8 @@ doitem(struct texi *p, enum texicmd cmd, size_t *pos) if (p->outcol > 0) texiputchar(p, '\n'); return; - } - + } + if (p->outmacro) texierr(p, "item in open line scope!?"); else if (p->literal) @@ -2073,7 +2112,7 @@ domultitable(struct texi *p, enum texicmd cmd, size_t texivspace(p); p->list = TEXILIST_TABLE; - /* + /* * TS/TE blocks aren't "in mdoc(7)", so we can disregard the * fact that we're in literal mode right now. */ @@ -2088,7 +2127,7 @@ domultitable(struct texi *p, enum texicmd cmd, size_t /* Make sure we don't print anything when scanning. */ p->ign++; if (*pos < BUFSZ(p) && '@' == BUF(p)[*pos]) { - /* + /* * Look for @columnfractions. * We ignore these, but we do use the number of * arguments to set the number of columns that we'll @@ -2096,7 +2135,7 @@ domultitable(struct texi *p, enum texicmd cmd, size_t */ type = texicmd(p, *pos, &end, NULL); advanceto(p, pos, end); - if (TEXICMD_COLUMNFRACTIONS != type) + if (TEXICMD_COLUMNFRACTIONS != type) texierr(p, "unknown multitable command"); while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) @@ -2108,7 +2147,7 @@ domultitable(struct texi *p, enum texicmd cmd, size_t } columns++; } - } else + } else /* * We have arguments. * We could parse these, but it's easier to just let @@ -2165,7 +2204,7 @@ doend(struct texi *p, enum texicmd cmd, size_t *pos) while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) advance(p, pos); - texiwarn(p, "unexpected \"end\": %.*s", + texiwarn(p, "unexpected \"end\": %.*s", (int)(*pos - start), &BUF(p)[start]); advanceeoln(p, pos, 1); } @@ -2279,9 +2318,9 @@ doignline(struct texi *p, enum texicmd cmd, size_t *po static char ** parsedirs(struct texi *p, const char *base, const char *cp, size_t *sz) { - char *tok, *str, *tofree; - const char *cpp; - size_t i = 0; + char *tok, *str, *tofree; + const char *cpp; + size_t i = 0; char **dirs; /* Count up our expected arguments. */ @@ -2294,15 +2333,15 @@ parsedirs(struct texi *p, const char *base, const char return(NULL); if (NULL == (dirs = calloc(*sz, sizeof(char *)))) texiabort(p, NULL); - if (NULL != base && NULL == (dirs[i++] = strdup(base))) + if (NULL != base && NULL == (dirs[i++] = strdup(base))) texiabort(p, NULL); if (NULL == cp) return(dirs); if (NULL == (tofree = tok = str = strdup(cp))) texiabort(p, NULL); - for ( ; NULL != (tok = strsep(&str, ":")); i++) - if (NULL == (dirs[i] = strdup(tok))) + for ( ; NULL != (tok = strsep(&str, ":")); i++) + if (NULL == (dirs[i] = strdup(tok))) texiabort(p, NULL); free(tofree); @@ -2313,9 +2352,12 @@ int main(int argc, char *argv[]) { struct texi texi; - int c; + char date[32]; + struct stat st; char *dirpath, *dir, *ccp; const char *progname, *Idir, *cp; + time_t t; + int c; progname = strrchr(argv[0], '/'); if (progname == NULL) @@ -2329,11 +2371,14 @@ main(int argc, char *argv[]) texi.seenvs = -1; Idir = NULL; - while (-1 != (c = getopt(argc, argv, "C:I:"))) + while (-1 != (c = getopt(argc, argv, "C:d:I:"))) switch (c) { case ('C'): texi.chapters = optarg; break; + case ('d'): + texi.date = optarg; + break; case ('I'): Idir = optarg; break; @@ -2355,7 +2400,7 @@ main(int argc, char *argv[]) texiabort(&texi, NULL); if (NULL == (dir = dirname(dirpath))) texiabort(&texi, NULL); - if (NULL != (cp = strrchr(argv[0], '/'))) + if (NULL != (cp = strrchr(argv[0], '/'))) texi.title = strdup(cp + 1); else texi.title = strdup(argv[0]); @@ -2365,16 +2410,29 @@ main(int argc, char *argv[]) *ccp = '\0'; texi.dirs = parsedirs(&texi, dir, Idir, &texi.dirsz); free(dirpath); + if (NULL == texi.date) { + t = stat(argv[0], &st) == 0 ? st.st_mtime : time(NULL); + strftime(date, sizeof(date), + "%B %e, %Y", localtime(&t)); + texi.date = date; + } parsefile(&texi, argv[0], 1); } else { texi.title = strdup("Unknown Manual"); texi.dirs = parsedirs(&texi, NULL, Idir, &texi.dirsz); + if (NULL == texi.date) { + t = time(NULL); + strftime(date, sizeof(date), + "%B %e, %Y", localtime(&t)); + texi.date = date; + } parsestdin(&texi); } texiexit(&texi); exit(EXIT_SUCCESS); usage: - fprintf(stderr, "usage: %s [-Cdir] [-Idirs] [file]\n", progname); + fprintf(stderr, "usage: %s [-C dir] [-d date] [-I dirs] [file]\n", + progname); return(EXIT_FAILURE); }