=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.93 retrieving revision 1.120 diff -u -p -r1.93 -r1.120 --- mandoc/mandocdb.c 2014/01/02 18:52:15 1.93 +++ mandoc/mandocdb.c 2014/03/23 12:11:18 1.120 @@ -1,7 +1,7 @@ -/* $Id: mandocdb.c,v 1.93 2014/01/02 18:52:15 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.120 2014/03/23 12:11:18 schwarze Exp $ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons - * Copyright (c) 2011, 2012, 2013 Ingo Schwarze + * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +20,7 @@ #endif #include +#include #include #include @@ -45,9 +46,13 @@ #include "mdoc.h" #include "man.h" #include "mandoc.h" +#include "mandoc_aux.h" #include "manpath.h" #include "mansearch.h" +extern int mansearch_keymax; +extern const char *const mansearch_keynames[]; + #define SQL_EXEC(_v) \ if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ fprintf(stderr, "%s\n", sqlite3_errmsg(db)) @@ -82,10 +87,10 @@ enum form { }; struct str { - char *utf8; /* key in UTF-8 form */ + char *rendered; /* key in UTF-8 or ASCII form */ const struct mpage *mpage; /* if set, the owning parse */ uint64_t mask; /* bitmask in sequence */ - char key[]; /* the string itself */ + char key[]; /* may contain escape sequences */ }; struct inodev { @@ -95,6 +100,7 @@ struct inodev { struct mpage { struct inodev inodev; /* used for hashing routine */ + int64_t recno; /* id in mpages SQL table */ enum form form; /* format from file content */ char *sec; /* section from file content */ char *arch; /* architecture from file content */ @@ -112,13 +118,9 @@ struct mlink { char *name; /* name from file name (not empty) */ char *fsec; /* section from file name suffix */ struct mlink *next; /* singly linked list */ + struct mpage *mpage; /* parent */ }; -struct title { - char *title; /* name(sec/arch) given inside the file */ - char *file; /* file name in case of mismatch */ -}; - enum stmt { STMT_DELETE_PAGE = 0, /* delete mpage */ STMT_INSERT_PAGE, /* insert mpage */ @@ -135,7 +137,8 @@ struct mdoc_handler { }; static void dbclose(int); -static void dbindex(const struct mpage *, struct mchars *); +static void dbadd(struct mpage *, struct mchars *); +static void dbadd_mlink(const struct mlink *mlink); static int dbopen(int); static void dbprune(void); static void filescan(const char *); @@ -147,7 +150,7 @@ static int mlink_check(struct mpage *, struct mlink * static void mlink_free(struct mlink *); static void mlinks_undupe(struct mpage *); static void mpages_free(void); -static void mpages_merge(struct mchars *, struct mparse *, int); +static void mpages_merge(struct mchars *, struct mparse *); static void parse_cat(struct mpage *); static void parse_man(struct mpage *, const struct man_node *); static void parse_mdoc(struct mpage *, const struct mdoc_node *); @@ -159,23 +162,25 @@ static int parse_mdoc_Nd(struct mpage *, const struct static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *); static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *); static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *); -static void putkey(const struct mpage *, - const char *, uint64_t); +static void putkey(const struct mpage *, char *, uint64_t); static void putkeys(const struct mpage *, const char *, size_t, uint64_t); static void putmdockey(const struct mpage *, const struct mdoc_node *, uint64_t); +static void render_key(struct mchars *, struct str *); static void say(const char *, const char *, ...); static int set_basedir(const char *); static int treescan(void); static size_t utf8(unsigned int, char [7]); -static void utf8key(struct mchars *, struct str *); +static char tempfilename[32]; static char *progname; -static int use_all; /* use all found files */ static int nodb; /* no database changes */ +static int mparse_options; /* abort the parse early */ +static int use_all; /* use all found files */ static int verb; /* print what we're doing */ static int warnings; /* warn about crap */ +static int write_utf8; /* write UTF-8 output; else ASCII */ static int exitcode; /* to be returned by main */ static enum op op; /* operational mode */ static char basedir[PATH_MAX]; /* current base directory */ @@ -351,7 +356,7 @@ main(int argc, char *argv[]) path_arg = NULL; op = OP_DEFAULT; - while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW"))) switch (ch) { case ('a'): use_all = 1; @@ -369,6 +374,17 @@ main(int argc, char *argv[]) case ('n'): nodb = 1; break; + case ('Q'): + mparse_options |= MPARSE_QUICK; + break; + case ('T'): + if (strcmp(optarg, "utf8")) { + fprintf(stderr, "-T%s: Unsupported " + "output format\n", optarg); + goto usage; + } + write_utf8 = 1; + break; case ('t'): CHECKOP(op, ch); dup2(STDOUT_FILENO, STDERR_FILENO); @@ -399,8 +415,7 @@ main(int argc, char *argv[]) } exitcode = (int)MANDOCLEVEL_OK; - mp = mparse_alloc(MPARSE_AUTO, - MANDOCLEVEL_FATAL, NULL, NULL, NULL); + mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL); mc = mchars_alloc(); ohash_init(&mpages, 6, &mpages_info); @@ -426,7 +441,7 @@ main(int argc, char *argv[]) if (OP_TEST != op) dbprune(); if (OP_DELETE != op) - mpages_merge(mc, mp, 0); + mpages_merge(mc, mp); dbclose(1); } else { /* @@ -470,7 +485,7 @@ main(int argc, char *argv[]) if (0 == dbopen(0)) goto out; - mpages_merge(mc, mp, warnings && !use_all); + mpages_merge(mc, mp); dbclose(0); if (j + 1 < dirs.sz) { @@ -490,11 +505,11 @@ out: ohash_delete(&mlinks); return(exitcode); usage: - fprintf(stderr, "usage: %s [-anvW] [-C file]\n" - " %s [-anvW] dir ...\n" - " %s [-nvW] -d dir [file ...]\n" + fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n" + " %s [-anQvW] [-Tutf8] dir ...\n" + " %s [-nQvW] [-Tutf8] -d dir [file ...]\n" " %s [-nvW] -u dir [file ...]\n" - " %s -t file ...\n", + " %s [-Q] -t file ...\n", progname, progname, progname, progname, progname); @@ -522,8 +537,8 @@ treescan(void) FTSENT *ff; struct mlink *mlink; int dform; - char *fsec; - const char *dsec, *arch, *cp, *path; + char *dsec, *arch, *fsec, *cp; + const char *path; const char *argv[2]; argv[0] = "."; @@ -588,16 +603,14 @@ treescan(void) continue; } else fsec[-1] = '\0'; + mlink = mandoc_calloc(1, sizeof(struct mlink)); strlcpy(mlink->file, path, sizeof(mlink->file)); mlink->dform = dform; - if (NULL != dsec) - mlink->dsec = mandoc_strdup(dsec); - if (NULL != arch) - mlink->arch = mandoc_strdup(arch); - mlink->name = mandoc_strdup(ff->fts_name); - if (NULL != fsec) - mlink->fsec = mandoc_strdup(fsec); + mlink->dsec = dsec; + mlink->arch = arch; + mlink->name = ff->fts_name; + mlink->fsec = fsec; mlink_add(mlink, ff->fts_statp); continue; } else if (FTS_D != ff->fts_info && @@ -617,8 +630,6 @@ treescan(void) * Try to infer this from the name. * If we're not in use_all, enforce it. */ - dsec = NULL; - dform = FORM_NONE; cp = ff->fts_name; if (FTS_DP == ff->fts_info) break; @@ -629,6 +640,9 @@ treescan(void) } else if (0 == strncmp(cp, "cat", 3)) { dform = FORM_CAT; dsec = cp + 3; + } else { + dform = FORM_NONE; + dsec = NULL; } if (NULL != dsec || use_all) @@ -643,9 +657,10 @@ treescan(void) * Possibly our architecture. * If we're descending, keep tabs on it. */ - arch = NULL; if (FTS_DP != ff->fts_info && NULL != dsec) arch = ff->fts_name; + else + arch = NULL; break; default: if (FTS_DP == ff->fts_info || use_all) @@ -692,11 +707,19 @@ filescan(const char *file) exitcode = (int)MANDOCLEVEL_BADARG; say(file, NULL); return; - } else if (OP_TEST != op && strstr(buf, basedir) != buf) { + } + + if (strstr(buf, basedir) == buf) + start = buf + strlen(basedir) + 1; + else if (OP_TEST == op) + start = buf; + else { exitcode = (int)MANDOCLEVEL_BADARG; say("", "%s: outside base directory", buf); return; - } else if (-1 == stat(buf, &st)) { + } + + if (-1 == stat(buf, &st)) { exitcode = (int)MANDOCLEVEL_BADARG; say(file, NULL); return; @@ -705,7 +728,7 @@ filescan(const char *file) say(file, "Not a regular file"); return; } - start = buf + strlen(basedir); + mlink = mandoc_calloc(1, sizeof(struct mlink)); strlcpy(mlink->file, start, sizeof(mlink->file)); @@ -719,16 +742,16 @@ filescan(const char *file) *p++ = '\0'; if (0 == strncmp(start, "man", 3)) { mlink->dform = FORM_SRC; - mlink->dsec = mandoc_strdup(start + 3); + mlink->dsec = start + 3; } else if (0 == strncmp(start, "cat", 3)) { mlink->dform = FORM_CAT; - mlink->dsec = mandoc_strdup(start + 3); + mlink->dsec = start + 3; } start = p; if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { *p++ = '\0'; - mlink->arch = mandoc_strdup(start); + mlink->arch = start; start = p; } } @@ -743,7 +766,7 @@ filescan(const char *file) if ('.' == *p) { *p++ = '\0'; - mlink->fsec = mandoc_strdup(p); + mlink->fsec = p; } /* @@ -755,8 +778,6 @@ filescan(const char *file) mlink->name = p + 1; *p = '\0'; } - mlink->name = mandoc_strdup(mlink->name); - mlink_add(mlink, &st); } @@ -769,14 +790,10 @@ mlink_add(struct mlink *mlink, const struct stat *st) assert(NULL != mlink->file); - if (NULL == mlink->dsec) - mlink->dsec = mandoc_strdup(""); - if (NULL == mlink->arch) - mlink->arch = mandoc_strdup(""); - if (NULL == mlink->name) - mlink->name = mandoc_strdup(""); - if (NULL == mlink->fsec) - mlink->fsec = mandoc_strdup(""); + mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); + mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); + mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); + mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); if ('0' == *mlink->fsec) { free(mlink->fsec); @@ -804,6 +821,7 @@ mlink_add(struct mlink *mlink, const struct stat *st) } else mlink->next = mpage->mlinks; mpage->mlinks = mlink; + mlink->mpage = mpage; } static void @@ -942,19 +960,18 @@ mlink_check(struct mpage *mpage, struct mlink *mlink) * and filename to determine whether the file is parsable or not. */ static void -mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable) +mpages_merge(struct mchars *mc, struct mparse *mp) { - struct ohash title_table; - struct ohash_info title_info, str_info; - struct mpage *mpage; - struct mlink *mlink; + char any[] = "any"; + struct ohash_info str_info; + struct mpage *mpage, *mpage_dest; + struct mlink *mlink, *mlink_dest; struct mdoc *mdoc; struct man *man; - struct title *title_entry; - char *title_str; - const char *cp; + char *sodest; + char *cp; int match; - unsigned int pslot, tslot; + unsigned int pslot; enum mandoclevel lvl; str_info.alloc = hash_alloc; @@ -962,13 +979,8 @@ mpages_merge(struct mchars *mc, struct mparse *mp, int str_info.hfree = hash_free; str_info.key_offset = offsetof(struct str, key); - if (check_reachable) { - title_info.alloc = hash_alloc; - title_info.halloc = hash_halloc; - title_info.hfree = hash_free; - title_info.key_offset = offsetof(struct title, title); - ohash_init(&title_table, 6, &title_info); - } + if (0 == nodb) + SQL_EXEC("BEGIN TRANSACTION"); mpage = ohash_first(&mpages, &pslot); while (NULL != mpage) { @@ -992,10 +1004,48 @@ mpages_merge(struct mchars *mc, struct mparse *mp, int FORM_CAT != mpage->mlinks->fform) { lvl = mparse_readfd(mp, -1, mpage->mlinks->file); if (lvl < MANDOCLEVEL_FATAL) - mparse_result(mp, &mdoc, &man); + mparse_result(mp, &mdoc, &man, &sodest); } - if (NULL != mdoc) { + if (NULL != sodest) { + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, sodest)); + if (NULL != mlink_dest) { + + /* The .so target exists. */ + + mpage_dest = mlink_dest->mpage; + mlink = mpage->mlinks; + while (1) { + mlink->mpage = mpage_dest; + + /* + * If the target was already + * processed, add the links + * to the database now. + * Otherwise, this will + * happen when we come + * to the target. + */ + + if (mpage_dest->recno) + dbadd_mlink(mlink); + + if (NULL == mlink->next) + break; + mlink = mlink->next; + } + + /* Move all links to the target. */ + + mlink->next = mlink_dest->next; + mlink_dest->next = mpage->mlinks; + mpage->mlinks = NULL; + } + ohash_delete(&strings); + mpage = ohash_next(&mpages, &pslot); + continue; + } else if (NULL != mdoc) { mpage->form = FORM_SRC; mpage->sec = mandoc_strdup(mdoc_meta(mdoc)->msec); @@ -1021,9 +1071,19 @@ mpages_merge(struct mchars *mc, struct mparse *mp, int mpage->title = mandoc_strdup(mpage->mlinks->name); } + putkey(mpage, mpage->sec, TYPE_sec); + putkey(mpage, '\0' == *mpage->arch ? + any : mpage->arch, TYPE_arch); - for (mlink = mpage->mlinks; mlink; mlink = mlink->next) + for (mlink = mpage->mlinks; mlink; mlink = mlink->next) { + if ('\0' != *mlink->dsec) + putkey(mpage, mlink->dsec, TYPE_sec); + if ('\0' != *mlink->fsec) + putkey(mpage, mlink->fsec, TYPE_sec); + putkey(mpage, '\0' == *mlink->arch ? + any : mlink->arch, TYPE_arch); putkey(mpage, mlink->name, TYPE_Nm); + } if (warnings && !use_all) { match = 0; @@ -1046,56 +1106,13 @@ mpages_merge(struct mchars *mc, struct mparse *mp, int else parse_cat(mpage); - /* - * Build a title string for the file. If it matches - * the location of the file, remember the title as - * found; else, remember it as missing. - */ - - if (check_reachable) { - if (-1 == asprintf(&title_str, "%s(%s%s%s)", - mpage->title, mpage->sec, - '\0' == *mpage->arch ? "" : "/", - mpage->arch)) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); - } - tslot = ohash_qlookup(&title_table, title_str); - title_entry = ohash_find(&title_table, tslot); - if (NULL == title_entry) { - title_entry = mandoc_malloc( - sizeof(struct title)); - title_entry->title = title_str; - title_entry->file = mandoc_strdup( - match ? "" : mpage->mlinks->file); - ohash_insert(&title_table, tslot, - title_entry); - } else { - if (match) - *title_entry->file = '\0'; - free(title_str); - } - } - - dbindex(mpage, mc); + dbadd(mpage, mc); ohash_delete(&strings); mpage = ohash_next(&mpages, &pslot); } - if (check_reachable) { - title_entry = ohash_first(&title_table, &tslot); - while (NULL != title_entry) { - if ('\0' != *title_entry->file) - say(title_entry->file, - "Probably unreachable, title is %s", - title_entry->title); - free(title_entry->title); - free(title_entry->file); - free(title_entry); - title_entry = ohash_next(&title_table, &tslot); - } - ohash_delete(&title_table); - } + if (0 == nodb) + SQL_EXEC("END TRANSACTION"); } static void @@ -1214,10 +1231,15 @@ parse_cat(struct mpage *mpage) * Put a type/word pair into the word database for this particular file. */ static void -putkey(const struct mpage *mpage, const char *value, uint64_t type) +putkey(const struct mpage *mpage, char *value, uint64_t type) { + char *cp; assert(NULL != value); + if (TYPE_arch == type) + for (cp = value; *cp; cp++) + if (isupper((unsigned char)*cp)) + *cp = _tolower((unsigned char)*cp); putkeys(mpage, value, strlen(value), type); } @@ -1333,6 +1355,15 @@ parse_man(struct mpage *mpage, const struct man_node * byte = start[sz]; start[sz] = '\0'; + /* + * Assume a stray trailing comma in the + * name list if a name begins with a dash. + */ + + if ('-' == start[0] || + ('\\' == start[0] && '-' == start[1])) + break; + putkey(mpage, start, TYPE_Nm); if (' ' == byte) { @@ -1462,7 +1493,7 @@ parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_n static int parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n) { - const char *cp; + char *cp; if (NULL == (n = n->child) || MDOC_TEXT != n->type) return(0); @@ -1505,10 +1536,7 @@ parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_n return(0); } - if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); - } + mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); putkey(mpage, cp, TYPE_Xr); free(cp); return(0); @@ -1584,12 +1612,24 @@ putkeys(const struct mpage *mpage, const char *cp, size_t sz, uint64_t v) { struct str *s; - unsigned int slot; const char *end; + uint64_t mask; + unsigned int slot; + int i; if (0 == sz) return; + if (verb > 1) { + for (i = 0, mask = 1; + i < mansearch_keymax; + i++, mask <<= 1) + if (mask & v) + break; + say(mpage->mlinks->file, "Adding key %s=%*s", + mansearch_keynames[i], sz, cp); + } + end = cp + sz; slot = ohash_qlookupi(&strings, cp, &end); s = ohash_find(&strings, slot); @@ -1659,26 +1699,27 @@ utf8(unsigned int cp, char out[7]) } /* - * Store the UTF-8 version of a key, or alias the pointer if the key has - * no UTF-8 transcription marks in it. + * Store the rendered version of a key, or alias the pointer + * if the key contains no escape sequences. */ static void -utf8key(struct mchars *mc, struct str *key) +render_key(struct mchars *mc, struct str *key) { size_t sz, bsz, pos; - char utfbuf[7], res[5]; + char utfbuf[7], res[6]; char *buf; const char *seq, *cpp, *val; int len, u; enum mandoc_esc esc; - assert(NULL == key->utf8); + assert(NULL == key->rendered); res[0] = '\\'; res[1] = '\t'; res[2] = ASCII_NBRSP; res[3] = ASCII_HYPH; - res[4] = '\0'; + res[4] = ASCII_BREAK; + res[5] = '\0'; val = key->key; bsz = strlen(val); @@ -1688,7 +1729,7 @@ utf8key(struct mchars *mc, struct str *key) * pointer as ourselvse and get out of here. */ if (strcspn(val, res) == bsz) { - key->utf8 = key->key; + key->rendered = key->key; return; } @@ -1709,16 +1750,24 @@ utf8key(struct mchars *mc, struct str *key) val += sz; } - if (ASCII_HYPH == *val) { + switch (*val) { + case (ASCII_HYPH): buf[pos++] = '-'; val++; continue; - } else if ('\t' == *val || ASCII_NBRSP == *val) { + case ('\t'): + /* FALLTHROUGH */ + case (ASCII_NBRSP): buf[pos++] = ' '; val++; + /* FALLTHROUGH */ + case (ASCII_BREAK): continue; - } else if ('\\' != *val) + default: break; + } + if ('\\' != *val) + break; /* Read past the slash. */ @@ -1728,116 +1777,106 @@ utf8key(struct mchars *mc, struct str *key) * Parse the escape sequence and see if it's a * predefined character or special character. */ + esc = mandoc_escape ((const char **)&val, &seq, &len); if (ESCAPE_ERROR == esc) break; - if (ESCAPE_SPECIAL != esc) continue; - if (0 == (u = mchars_spec2cp(mc, seq, len))) - continue; /* - * If we have a Unicode codepoint, try to convert that - * to a UTF-8 byte string. + * Render the special character + * as either UTF-8 or ASCII. */ - cpp = utfbuf; - if (0 == (sz = utf8(u, utfbuf))) - continue; + if (write_utf8) { + if (0 == (u = mchars_spec2cp(mc, seq, len))) + continue; + cpp = utfbuf; + if (0 == (sz = utf8(u, utfbuf))) + continue; + sz = strlen(cpp); + } else { + cpp = mchars_spec2str(mc, seq, len, &sz); + if (NULL == cpp) + continue; + if (ASCII_NBRSP == *cpp) { + cpp = " "; + sz = 1; + } + } + /* Copy the rendered glyph into the stream. */ - sz = strlen(cpp); bsz += sz; - buf = mandoc_realloc(buf, bsz); - memcpy(&buf[pos], cpp, sz); pos += sz; } buf[pos] = '\0'; - key->utf8 = buf; + key->rendered = buf; } +static void +dbadd_mlink(const struct mlink *mlink) +{ + size_t i; + + i = 1; + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); + SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->recno); + SQL_STEP(stmts[STMT_INSERT_LINK]); + sqlite3_reset(stmts[STMT_INSERT_LINK]); +} + /* * Flush the current page's terms (and their bits) into the database. * Wrap the entire set of additions in a transaction to make sqlite be a * little faster. - * Also, UTF-8-encode the description at the last possible moment. + * Also, handle escape sequences at the last possible moment. */ static void -dbindex(const struct mpage *mpage, struct mchars *mc) +dbadd(struct mpage *mpage, struct mchars *mc) { struct mlink *mlink; struct str *key; - const char *desc; - int64_t recno; size_t i; unsigned int slot; if (verb) - say(mpage->mlinks->file, "Adding to index"); + say(mpage->mlinks->file, "Adding to database"); if (nodb) return; - desc = ""; - if (NULL != mpage->desc && '\0' != *mpage->desc) { - key = ohash_find(&strings, - ohash_qlookup(&strings, mpage->desc)); - assert(NULL != key); - if (NULL == key->utf8) - utf8key(mc, key); - desc = key->utf8; - } - - SQL_EXEC("BEGIN TRANSACTION"); - i = 1; - /* - * XXX The following three lines are obsolete - * and only kept for backward compatibility - * until apropos(1) and friends have caught up. - */ - SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file); - SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec); - SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch); - SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc); SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form); SQL_STEP(stmts[STMT_INSERT_PAGE]); - recno = sqlite3_last_insert_rowid(db); + mpage->recno = sqlite3_last_insert_rowid(db); sqlite3_reset(stmts[STMT_INSERT_PAGE]); - for (mlink = mpage->mlinks; mlink; mlink = mlink->next) { - i = 1; - SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file); - SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); - SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); - SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); - SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, recno); - SQL_STEP(stmts[STMT_INSERT_LINK]); - sqlite3_reset(stmts[STMT_INSERT_LINK]); - } + for (mlink = mpage->mlinks; mlink; mlink = mlink->next) + dbadd_mlink(mlink); for (key = ohash_first(&strings, &slot); NULL != key; key = ohash_next(&strings, &slot)) { assert(key->mpage == mpage); - if (NULL == key->utf8) - utf8key(mc, key); + if (NULL == key->rendered) + render_key(mc, key); i = 1; SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); - SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8); - SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); + SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered); + SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->recno); SQL_STEP(stmts[STMT_INSERT_KEY]); sqlite3_reset(stmts[STMT_INSERT_KEY]); - if (key->utf8 != key->key) - free(key->utf8); + if (key->rendered != key->key) + free(key->rendered); free(key); } - - SQL_EXEC("END TRANSACTION"); } static void @@ -1848,20 +1887,31 @@ dbprune(void) size_t i; unsigned int slot; - if (nodb) - return; + if (0 == nodb) + SQL_EXEC("BEGIN TRANSACTION"); - mpage = ohash_first(&mpages, &slot); - while (NULL != mpage) { + for (mpage = ohash_first(&mpages, &slot); NULL != mpage; + mpage = ohash_next(&mpages, &slot)) { mlink = mpage->mlinks; - i = 1; - SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file); - SQL_STEP(stmts[STMT_DELETE_PAGE]); - sqlite3_reset(stmts[STMT_DELETE_PAGE]); if (verb) - say(mlink->file, "Deleted from index"); - mpage = ohash_next(&mpages, &slot); + say(mlink->file, "Deleting from database"); + if (nodb) + continue; + for ( ; NULL != mlink; mlink = mlink->next) { + i = 1; + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->dsec); + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->arch); + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->name); + SQL_STEP(stmts[STMT_DELETE_PAGE]); + sqlite3_reset(stmts[STMT_DELETE_PAGE]); + } } + + if (0 == nodb) + SQL_EXEC("END TRANSACTION"); } /* @@ -1872,6 +1922,8 @@ static void dbclose(int real) { size_t i; + int status; + pid_t child; if (nodb) return; @@ -1887,10 +1939,60 @@ dbclose(int real) if (real) return; - if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { + if ('\0' == *tempfilename) { + if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "%s", strerror(errno)); + } + return; + } + + switch (child = fork()) { + case (-1): exitcode = (int)MANDOCLEVEL_SYSERR; - say(MANDOC_DB, NULL); + say("fork cmp", "%s", strerror(errno)); + return; + case (0): + execlp("cmp", "cmp", "-s", + tempfilename, MANDOC_DB, NULL); + say("exec cmp", "%s", strerror(errno)); + exit(0); + default: + break; } + if (-1 == waitpid(child, &status, 0)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("wait cmp", "%s", strerror(errno)); + } else if (WIFSIGNALED(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("cmp", "Died from a signal"); + } else if (WEXITSTATUS(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, + "Data changed, but cannot replace database"); + } + + *strrchr(tempfilename, '/') = '\0'; + switch (child = fork()) { + case (-1): + exitcode = (int)MANDOCLEVEL_SYSERR; + say("fork rm", "%s", strerror(errno)); + return; + case (0): + execlp("rm", "rm", "-rf", tempfilename, NULL); + say("exec rm", "%s", strerror(errno)); + exit((int)MANDOCLEVEL_SYSERR); + default: + break; + } + if (-1 == waitpid(child, &status, 0)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("wait rm", "%s", strerror(errno)); + } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(tempfilename, + "Cannot remove temporary directory"); + } } /* @@ -1904,91 +2006,99 @@ dbclose(int real) static int dbopen(int real) { - const char *file, *sql; + const char *sql; int rc, ofl; if (nodb) return(1); + *tempfilename = '\0'; ofl = SQLITE_OPEN_READWRITE; - if (0 == real) { - file = MANDOC_DB "~"; - if (-1 == remove(file) && ENOENT != errno) { + + if (real) { + rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL); + if (SQLITE_OK != rc) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); + say(MANDOC_DB, "%s", sqlite3_errmsg(db)); return(0); } - ofl |= SQLITE_OPEN_EXCLUSIVE; - } else - file = MANDOC_DB; + goto prepare_statements; + } - rc = sqlite3_open_v2(file, &db, ofl, NULL); + ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE; + + remove(MANDOC_DB "~"); + rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL); if (SQLITE_OK == rc) - goto prepare_statements; - if (SQLITE_CANTOPEN != rc) { + goto create_tables; + if (MPARSE_QUICK & mparse_options) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); + say(MANDOC_DB "~", "%s", sqlite3_errmsg(db)); return(0); } - sqlite3_close(db); - db = NULL; - - if (SQLITE_OK != (rc = sqlite3_open(file, &db))) { + if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", + sizeof(tempfilename)) >= sizeof(tempfilename)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, NULL); + say("/tmp/mandocdb.XXXXXX", "Filename too long"); return(0); } + if (NULL == mkdtemp(tempfilename)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(tempfilename, "%s", strerror(errno)); + return(0); + } + if (strlcat(tempfilename, "/" MANDOC_DB, + sizeof(tempfilename)) >= sizeof(tempfilename)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(tempfilename, "Filename too long"); + return(0); + } + rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); + if (SQLITE_OK != rc) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(tempfilename, "%s", sqlite3_errmsg(db)); + return(0); + } - /* - * XXX The first three columns in table mpages are obsolete - * and only kept for backward compatibility - * until apropos(1) and friends have caught up. - */ +create_tables: sql = "CREATE TABLE \"mpages\" (\n" - " \"file\" TEXT NOT NULL,\n" - " \"sec\" TEXT NOT NULL,\n" - " \"arch\" TEXT NOT NULL,\n" - " \"desc\" TEXT NOT NULL,\n" " \"form\" INTEGER NOT NULL,\n" " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" ");\n" "\n" "CREATE TABLE \"mlinks\" (\n" - " \"file\" TEXT NOT NULL,\n" " \"sec\" TEXT NOT NULL,\n" " \"arch\" TEXT NOT NULL,\n" " \"name\" TEXT NOT NULL,\n" " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) " - "ON DELETE CASCADE,\n" - " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" + "ON DELETE CASCADE\n" ");\n" "\n" "CREATE TABLE \"keys\" (\n" " \"bits\" INTEGER NOT NULL,\n" " \"key\" TEXT NOT NULL,\n" " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) " - "ON DELETE CASCADE,\n" - " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" - ");\n" - "\n" - "CREATE INDEX \"key_index\" ON keys (key);\n"; + "ON DELETE CASCADE\n" + ");\n"; if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { exitcode = (int)MANDOCLEVEL_SYSERR; - say(file, "%s", sqlite3_errmsg(db)); + say(MANDOC_DB, "%s", sqlite3_errmsg(db)); return(0); } prepare_statements: SQL_EXEC("PRAGMA foreign_keys = ON"); - sql = "DELETE FROM mpages where file=?"; + sql = "DELETE FROM mpages WHERE id IN " + "(SELECT pageid FROM mlinks WHERE " + "sec=? AND arch=? AND name=?)"; sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL); sql = "INSERT INTO mpages " - "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)"; + "(form) VALUES (?)"; sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL); sql = "INSERT INTO mlinks " - "(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)"; + "(sec,arch,name,pageid) VALUES (?,?,?,?)"; sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL); sql = "INSERT INTO keys " "(bits,key,pageid) VALUES (?,?,?)";