=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.120 retrieving revision 1.128 diff -u -p -r1.120 -r1.128 --- mandoc/mandocdb.c 2014/03/23 12:11:18 1.120 +++ mandoc/mandocdb.c 2014/04/04 02:31:07 1.128 @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.120 2014/03/23 12:11:18 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.128 2014/04/04 02:31:07 schwarze Exp $ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -119,6 +119,7 @@ struct mlink { char *fsec; /* section from file name suffix */ struct mlink *next; /* singly linked list */ struct mpage *mpage; /* parent */ + int gzip; /* filename has a .gz suffix */ }; enum stmt { @@ -151,7 +152,7 @@ static void mlink_free(struct mlink *); static void mlinks_undupe(struct mpage *); static void mpages_free(void); static void mpages_merge(struct mchars *, struct mparse *); -static void parse_cat(struct mpage *); +static void parse_cat(struct mpage *, int); static void parse_man(struct mpage *, const struct man_node *); static void parse_mdoc(struct mpage *, const struct mdoc_node *); static int parse_mdoc_body(struct mpage *, const struct mdoc_node *); @@ -178,7 +179,7 @@ static char *progname; static int nodb; /* no database changes */ static int mparse_options; /* abort the parse early */ static int use_all; /* use all found files */ -static int verb; /* print what we're doing */ +static int debug; /* print what we're doing */ static int warnings; /* warn about crap */ static int write_utf8; /* write UTF-8 output; else ASCII */ static int exitcode; /* to be returned by main */ @@ -356,7 +357,7 @@ main(int argc, char *argv[]) path_arg = NULL; op = OP_DEFAULT; - while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW"))) + while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) switch (ch) { case ('a'): use_all = 1; @@ -366,6 +367,9 @@ main(int argc, char *argv[]) path_arg = optarg; op = OP_CONFFILE; break; + case ('D'): + debug++; + break; case ('d'): CHECKOP(op, ch); path_arg = optarg; @@ -374,6 +378,9 @@ main(int argc, char *argv[]) case ('n'): nodb = 1; break; + case ('p'): + warnings = 1; + break; case ('Q'): mparse_options |= MPARSE_QUICK; break; @@ -397,11 +404,8 @@ main(int argc, char *argv[]) op = OP_DELETE; break; case ('v'): - verb++; + /* Compatibility with espie@'s makewhatis. */ break; - case ('W'): - warnings = 1; - break; default: goto usage; } @@ -458,6 +462,11 @@ main(int argc, char *argv[]) } else manpath_parse(&dirs, path_arg, NULL, NULL); + if (0 == dirs.sz) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "Empty manpath"); + } + /* * First scan the tree rooted at a base directory, then * build a new database and finally move it into place. @@ -505,10 +514,10 @@ out: ohash_delete(&mlinks); return(exitcode); usage: - fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n" - " %s [-anQvW] [-Tutf8] dir ...\n" - " %s [-nQvW] [-Tutf8] -d dir [file ...]\n" - " %s [-nvW] -u dir [file ...]\n" + fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" + " %s [-aDnpQ] [-Tutf8] dir ...\n" + " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" + " %s [-Dnp] -u dir [file ...]\n" " %s [-Q] -t file ...\n", progname, progname, progname, progname, progname); @@ -536,7 +545,7 @@ treescan(void) FTS *f; FTSENT *ff; struct mlink *mlink; - int dform; + int dform, gzip; char *dsec, *arch, *fsec, *cp; const char *path; const char *argv[2]; @@ -551,7 +560,7 @@ treescan(void) f = fts_open((char * const *)argv, FTS_LOGICAL, NULL); if (NULL == f) { exitcode = (int)MANDOCLEVEL_SYSERR; - say("", NULL); + say("", "&fts_open"); return(0); } @@ -571,8 +580,18 @@ treescan(void) if (warnings) say(path, "Extraneous file"); continue; - } else if (NULL == (fsec = - strrchr(ff->fts_name, '.'))) { + } + gzip = 0; + fsec = NULL; + while (NULL == fsec) { + fsec = strrchr(ff->fts_name, '.'); + if (NULL == fsec || strcmp(fsec+1, "gz")) + break; + gzip = 1; + *fsec = '\0'; + fsec = NULL; + } + if (NULL == fsec) { if ( ! use_all) { if (warnings) say(path, @@ -583,10 +602,6 @@ treescan(void) if (warnings) say(path, "Skip html"); continue; - } else if (0 == strcmp(fsec, "gz")) { - if (warnings) - say(path, "Skip gz"); - continue; } else if (0 == strcmp(fsec, "ps")) { if (warnings) say(path, "Skip ps"); @@ -611,6 +626,7 @@ treescan(void) mlink->arch = arch; mlink->name = ff->fts_name; mlink->fsec = fsec; + mlink->gzip = gzip; mlink_add(mlink, ff->fts_statp); continue; } else if (FTS_D != ff->fts_info && @@ -705,7 +721,7 @@ filescan(const char *file) if (NULL == realpath(file, buf)) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, NULL); + say(file, "&realpath"); return; } @@ -721,7 +737,7 @@ filescan(const char *file) if (-1 == stat(buf, &st)) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, NULL); + say(file, "&stat"); return; } else if ( ! (S_IFREG & st.st_mode)) { exitcode = (int)MANDOCLEVEL_BADARG; @@ -964,13 +980,15 @@ mpages_merge(struct mchars *mc, struct mparse *mp) { char any[] = "any"; struct ohash_info str_info; + int fd[2]; struct mpage *mpage, *mpage_dest; struct mlink *mlink, *mlink_dest; struct mdoc *mdoc; struct man *man; char *sodest; char *cp; - int match; + pid_t child_pid; + int match, status; unsigned int pslot; enum mandoclevel lvl; @@ -994,7 +1012,42 @@ mpages_merge(struct mchars *mc, struct mparse *mp) mparse_reset(mp); mdoc = NULL; man = NULL; + sodest = NULL; + child_pid = 0; + fd[0] = -1; + fd[1] = -1; + if (mpage->mlinks->gzip) { + if (-1 == pipe(fd)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(mpage->mlinks->file, "&pipe gunzip"); + goto nextpage; + } + switch (child_pid = fork()) { + case (-1): + exitcode = (int)MANDOCLEVEL_SYSERR; + say(mpage->mlinks->file, "&fork gunzip"); + child_pid = 0; + close(fd[1]); + close(fd[0]); + goto nextpage; + case (0): + close(fd[0]); + if (-1 == dup2(fd[1], STDOUT_FILENO)) { + say(mpage->mlinks->file, + "&dup gunzip"); + exit(1); + } + execlp("gunzip", "gunzip", "-c", + mpage->mlinks->file, NULL); + say(mpage->mlinks->file, "&exec gunzip"); + exit(1); + default: + close(fd[1]); + break; + } + } + /* * Try interpreting the file as mdoc(7) or man(7) * source code, unless it is already known to be @@ -1002,7 +1055,7 @@ mpages_merge(struct mchars *mc, struct mparse *mp) */ if (FORM_CAT != mpage->mlinks->dform || FORM_CAT != mpage->mlinks->fform) { - lvl = mparse_readfd(mp, -1, mpage->mlinks->file); + lvl = mparse_readfd(mp, fd[0], mpage->mlinks->file); if (lvl < MANDOCLEVEL_FATAL) mparse_result(mp, &mdoc, &man, &sodest); } @@ -1042,9 +1095,7 @@ mpages_merge(struct mchars *mc, struct mparse *mp) mlink_dest->next = mpage->mlinks; mpage->mlinks = NULL; } - ohash_delete(&strings); - mpage = ohash_next(&mpages, &pslot); - continue; + goto nextpage; } else if (NULL != mdoc) { mpage->form = FORM_SRC; mpage->sec = @@ -1104,9 +1155,27 @@ mpages_merge(struct mchars *mc, struct mparse *mp) } else if (NULL != man) parse_man(mpage, man_node(man)); else - parse_cat(mpage); + parse_cat(mpage, fd[0]); dbadd(mpage, mc); + +nextpage: + if (child_pid) { + if (-1 == waitpid(child_pid, &status, 0)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(mpage->mlinks->file, "&wait gunzip"); + } else if (WIFSIGNALED(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(mpage->mlinks->file, + "gunzip died from signal %d", + WTERMSIG(status)); + } else if (WEXITSTATUS(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(mpage->mlinks->file, + "gunzip failed with code %d", + WEXITSTATUS(status)); + } + } ohash_delete(&strings); mpage = ohash_next(&mpages, &pslot); } @@ -1116,15 +1185,18 @@ mpages_merge(struct mchars *mc, struct mparse *mp) } static void -parse_cat(struct mpage *mpage) +parse_cat(struct mpage *mpage, int fd) { FILE *stream; char *line, *p, *title; size_t len, plen, titlesz; - if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) { + stream = (-1 == fd) ? + fopen(mpage->mlinks->file, "r") : + fdopen(fd, "r"); + if (NULL == stream) { if (warnings) - say(mpage->mlinks->file, NULL); + say(mpage->mlinks->file, "&fopen"); return; } @@ -1263,9 +1335,9 @@ static void parse_man(struct mpage *mpage, const struct man_node *n) { const struct man_node *head, *body; - char *start, *sv, *title; + char *start, *title; char byte; - size_t sz, titlesz; + size_t sz; if (NULL == n) return; @@ -1285,12 +1357,8 @@ parse_man(struct mpage *mpage, const struct man_node * NULL != (head = (head->child)) && MAN_TEXT == head->type && 0 == strcmp(head->string, "NAME") && - NULL != (body = body->child) && - MAN_TEXT == body->type) { + NULL != body->child) { - title = NULL; - titlesz = 0; - /* * Suck the entire NAME section into memory. * Yes, we might run away. @@ -1298,47 +1366,11 @@ parse_man(struct mpage *mpage, const struct man_node * * NAME sections over many lines. */ - for ( ; NULL != body; body = body->next) { - if (MAN_TEXT != body->type) - break; - if (0 == (sz = strlen(body->string))) - continue; - title = mandoc_realloc - (title, titlesz + sz + 1); - memcpy(title + titlesz, body->string, sz); - titlesz += sz + 1; - title[titlesz - 1] = ' '; - } + title = NULL; + man_deroff(&title, body); if (NULL == title) return; - title = mandoc_realloc(title, titlesz + 1); - title[titlesz] = '\0'; - - /* Skip leading space. */ - - sv = title; - while (isspace((unsigned char)*sv)) - sv++; - - if (0 == (sz = strlen(sv))) { - free(title); - return; - } - - /* Erase trailing space. */ - - start = &sv[sz - 1]; - while (start > sv && isspace((unsigned char)*start)) - *start-- = '\0'; - - if (start == sv) { - free(title); - return; - } - - start = sv; - /* * Go through a special heuristic dance here. * Conventionally, one or more manual names are @@ -1347,6 +1379,7 @@ parse_man(struct mpage *mpage, const struct man_node * * the name parts here. */ + start = title; for ( ;; ) { sz = strcspn(start, " ,"); if ('\0' == start[sz]) @@ -1377,7 +1410,7 @@ parse_man(struct mpage *mpage, const struct man_node * start++; } - if (sv == start) { + if (start == title) { putkey(mpage, start, TYPE_Nm); free(title); return; @@ -1545,32 +1578,10 @@ parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_n static int parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n) { - size_t sz; - if (MDOC_BODY != n->type) - return(0); - - /* - * Special-case the `Nd' because we need to put the description - * into the document table. - */ - - for (n = n->child; NULL != n; n = n->next) { - if (MDOC_TEXT == n->type) { - if (NULL != mpage->desc) { - sz = strlen(mpage->desc) + - strlen(n->string) + 2; - mpage->desc = mandoc_realloc( - mpage->desc, sz); - strlcat(mpage->desc, " ", sz); - strlcat(mpage->desc, n->string, sz); - } else - mpage->desc = mandoc_strdup(n->string); - } - if (NULL != n->child) - parse_mdoc_Nd(mpage, n); - } - return(1); + if (MDOC_BODY == n->type) + mdoc_deroff(&mpage->desc, n); + return(0); } static int @@ -1620,7 +1631,7 @@ putkeys(const struct mpage *mpage, if (0 == sz) return; - if (verb > 1) { + if (debug > 1) { for (i = 0, mask = 1; i < mansearch_keymax; i++, mask <<= 1) @@ -1847,20 +1858,54 @@ dbadd(struct mpage *mpage, struct mchars *mc) size_t i; unsigned int slot; - if (verb) - say(mpage->mlinks->file, "Adding to database"); + mlink = mpage->mlinks; - if (nodb) + if (nodb) { + while (NULL != mlink) { + fputs(mlink->name, stdout); + if (NULL == mlink->next || + strcmp(mlink->dsec, mlink->next->dsec) || + strcmp(mlink->fsec, mlink->next->fsec) || + strcmp(mlink->arch, mlink->next->arch)) { + putchar('('); + if ('\0' == *mlink->dsec) + fputs(mlink->fsec, stdout); + else + fputs(mlink->dsec, stdout); + if ('\0' != *mlink->arch) + printf("/%s", mlink->arch); + putchar(')'); + } + mlink = mlink->next; + if (NULL != mlink) + fputs(", ", stdout); + } + for (key = ohash_first(&strings, &slot); NULL != key; + key = ohash_next(&strings, &slot)) { + if (TYPE_Nd & key->mask) { + if (NULL == key->rendered) + render_key(mc, key); + printf(" - %s", key->rendered); + break; + } + } + putchar('\n'); return; + } + if (debug) + say(mlink->file, "Adding to database"); + i = 1; SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form); SQL_STEP(stmts[STMT_INSERT_PAGE]); mpage->recno = sqlite3_last_insert_rowid(db); sqlite3_reset(stmts[STMT_INSERT_PAGE]); - for (mlink = mpage->mlinks; mlink; mlink = mlink->next) + while (NULL != mlink) { dbadd_mlink(mlink); + mlink = mlink->next; + } for (key = ohash_first(&strings, &slot); NULL != key; key = ohash_next(&strings, &slot)) { @@ -1893,7 +1938,7 @@ dbprune(void) for (mpage = ohash_first(&mpages, &slot); NULL != mpage; mpage = ohash_next(&mpages, &slot)) { mlink = mpage->mlinks; - if (verb) + if (debug) say(mlink->file, "Deleting from database"); if (nodb) continue; @@ -1942,7 +1987,7 @@ dbclose(int real) if ('\0' == *tempfilename) { if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(MANDOC_DB, "%s", strerror(errno)); + say(MANDOC_DB, "&rename"); } return; } @@ -1950,22 +1995,22 @@ dbclose(int real) switch (child = fork()) { case (-1): exitcode = (int)MANDOCLEVEL_SYSERR; - say("fork cmp", "%s", strerror(errno)); + say("", "&fork cmp"); return; case (0): execlp("cmp", "cmp", "-s", tempfilename, MANDOC_DB, NULL); - say("exec cmp", "%s", strerror(errno)); + say("", "&exec cmp"); exit(0); default: break; } if (-1 == waitpid(child, &status, 0)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say("wait cmp", "%s", strerror(errno)); + say("", "&wait cmp"); } else if (WIFSIGNALED(status)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say("cmp", "Died from a signal"); + say("", "cmp died from signal %d", WTERMSIG(status)); } else if (WEXITSTATUS(status)) { exitcode = (int)MANDOCLEVEL_SYSERR; say(MANDOC_DB, @@ -1976,22 +2021,22 @@ dbclose(int real) switch (child = fork()) { case (-1): exitcode = (int)MANDOCLEVEL_SYSERR; - say("fork rm", "%s", strerror(errno)); + say("", "&fork rm"); return; case (0): execlp("rm", "rm", "-rf", tempfilename, NULL); - say("exec rm", "%s", strerror(errno)); + say("", "&exec rm"); exit((int)MANDOCLEVEL_SYSERR); default: break; } if (-1 == waitpid(child, &status, 0)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say("wait rm", "%s", strerror(errno)); + say("", "&wait rm"); } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(tempfilename, - "Cannot remove temporary directory"); + say("", "%s: Cannot remove temporary directory", + tempfilename); } } @@ -2040,24 +2085,25 @@ dbopen(int real) if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", sizeof(tempfilename)) >= sizeof(tempfilename)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say("/tmp/mandocdb.XXXXXX", "Filename too long"); + say("", "/tmp/mandocdb.XXXXXX: Filename too long"); return(0); } if (NULL == mkdtemp(tempfilename)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(tempfilename, "%s", strerror(errno)); + say("", "&%s", tempfilename); return(0); } if (strlcat(tempfilename, "/" MANDOC_DB, sizeof(tempfilename)) >= sizeof(tempfilename)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(tempfilename, "Filename too long"); + say("", "%s/" MANDOC_DB ": Filename too long", + tempfilename); return(0); } rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); if (SQLITE_OK != rc) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(tempfilename, "%s", sqlite3_errmsg(db)); + say("", "%s: %s", tempfilename, sqlite3_errmsg(db)); return(0); } @@ -2154,12 +2200,12 @@ set_basedir(const char *targetdir) if (NULL == getcwd(startdir, PATH_MAX)) { exitcode = (int)MANDOCLEVEL_SYSERR; if (NULL != targetdir) - say(".", NULL); + say("", "&getcwd"); return(0); } if (-1 == (fd = open(startdir, O_RDONLY, 0))) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(startdir, NULL); + say("", "&open %s", startdir); return(0); } if (NULL == targetdir) @@ -2171,7 +2217,7 @@ set_basedir(const char *targetdir) close(fd); basedir[0] = '\0'; exitcode = (int)MANDOCLEVEL_SYSERR; - say(startdir, NULL); + say("", "&chdir %s", startdir); return(0); } if (NULL == targetdir) { @@ -2182,11 +2228,11 @@ set_basedir(const char *targetdir) if (NULL == realpath(targetdir, basedir)) { basedir[0] = '\0'; exitcode = (int)MANDOCLEVEL_BADARG; - say(targetdir, NULL); + say("", "&%s: realpath", targetdir); return(0); } else if (-1 == chdir(basedir)) { exitcode = (int)MANDOCLEVEL_BADARG; - say("", NULL); + say("", "&chdir"); return(0); } return(1); @@ -2196,6 +2242,7 @@ static void say(const char *file, const char *format, ...) { va_list ap; + int use_errno; if ('\0' != *basedir) fprintf(stderr, "%s", basedir); @@ -2203,16 +2250,32 @@ say(const char *file, const char *format, ...) fputs("//", stderr); if ('\0' != *file) fprintf(stderr, "%s", file); - fputs(": ", stderr); - if (NULL == format) { - perror(NULL); - return; + use_errno = 1; + if (NULL != format) { + switch (*format) { + case ('&'): + format++; + break; + case ('\0'): + format = NULL; + break; + default: + use_errno = 0; + break; + } } - - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - - fputc('\n', stderr); + if (NULL != format) { + if ('\0' != *basedir || '\0' != *file) + fputs(": ", stderr); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + } + if (use_errno) { + if ('\0' != *basedir || '\0' != *file || NULL != format) + fputs(": ", stderr); + perror(NULL); + } else + fputc('\n', stderr); }