Return to mandocdb.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.52, 2012/06/08 15:06:28 | version 1.125, 2014/04/03 15:37:19 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> | * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> | ||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
|
|
||
#include "config.h" | #include "config.h" | ||
#endif | #endif | ||
#include <sys/param.h> | |||
#include <sys/stat.h> | #include <sys/stat.h> | ||
#include <sys/wait.h> | |||
#include <assert.h> | #include <assert.h> | ||
#include <ctype.h> | #include <ctype.h> | ||
|
|
||
#include <fcntl.h> | #include <fcntl.h> | ||
#include <fts.h> | #include <fts.h> | ||
#include <getopt.h> | #include <getopt.h> | ||
#include <limits.h> | |||
#include <stddef.h> | #include <stddef.h> | ||
#include <stdio.h> | |||
#include <stdint.h> | #include <stdint.h> | ||
#include <stdlib.h> | #include <stdlib.h> | ||
#include <string.h> | #include <string.h> | ||
#include <unistd.h> | #include <unistd.h> | ||
#ifdef HAVE_OHASH | |||
#include <ohash.h> | #include <ohash.h> | ||
#else | |||
#include "compat_ohash.h" | |||
#endif | |||
#include <sqlite3.h> | #include <sqlite3.h> | ||
#include "mdoc.h" | #include "mdoc.h" | ||
#include "man.h" | #include "man.h" | ||
#include "mandoc.h" | #include "mandoc.h" | ||
#include "mandocdb.h" | #include "mandoc_aux.h" | ||
#include "manpath.h" | #include "manpath.h" | ||
#include "mansearch.h" | |||
/* Post a warning to stderr. */ | extern int mansearch_keymax; | ||
#define WARNING(_f, _b, _fmt, _args...) \ | extern const char *const mansearch_keynames[]; | ||
do if (warnings) { \ | |||
fprintf(stderr, "%s: ", (_b)); \ | |||
fprintf(stderr, (_fmt), ##_args); \ | |||
if ('\0' != *(_f)) \ | |||
fprintf(stderr, ": %s", (_f)); \ | |||
fprintf(stderr, "\n"); \ | |||
} while (/* CONSTCOND */ 0) | |||
/* Post a "verbose" message to stderr. */ | |||
#define DEBUG(_f, _b, _fmt, _args...) \ | |||
do if (verb) { \ | |||
fprintf(stderr, "%s: ", (_b)); \ | |||
fprintf(stderr, (_fmt), ##_args); \ | |||
fprintf(stderr, ": %s\n", (_f)); \ | |||
} while (/* CONSTCOND */ 0) | |||
#define SQL_EXEC(_v) \ | #define SQL_EXEC(_v) \ | ||
if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ | if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ | ||
|
|
||
}; | }; | ||
enum form { | enum form { | ||
FORM_SRC, /* format is -man or -mdoc */ | FORM_NONE, /* format is unknown */ | ||
FORM_CAT, /* format is cat */ | FORM_SRC, /* format is -man or -mdoc */ | ||
FORM_NONE /* format is unknown */ | FORM_CAT /* format is cat */ | ||
}; | }; | ||
struct str { | struct str { | ||
char *utf8; /* key in UTF-8 form */ | char *rendered; /* key in UTF-8 or ASCII form */ | ||
const struct of *of; /* if set, the owning parse */ | const struct mpage *mpage; /* if set, the owning parse */ | ||
struct str *next; /* next in owning parse sequence */ | |||
uint64_t mask; /* bitmask in sequence */ | uint64_t mask; /* bitmask in sequence */ | ||
char key[]; /* the string itself */ | char key[]; /* may contain escape sequences */ | ||
}; | }; | ||
struct id { | struct inodev { | ||
ino_t ino; | ino_t st_ino; | ||
dev_t dev; | dev_t st_dev; | ||
}; | }; | ||
struct of { | struct mpage { | ||
struct id id; /* used for hashing routine */ | struct inodev inodev; /* used for hashing routine */ | ||
struct of *next; /* next in ofs */ | int64_t recno; /* id in mpages SQL table */ | ||
enum form dform; /* path-cued form */ | enum form form; /* format from file content */ | ||
enum form sform; /* suffix-cued form */ | char *sec; /* section from file content */ | ||
char file[MAXPATHLEN]; /* filename rel. to manpath */ | char *arch; /* architecture from file content */ | ||
const char *desc; /* parsed description */ | char *title; /* title from file content */ | ||
const char *sec; /* suffix-cued section (or empty) */ | char *desc; /* description from file content */ | ||
const char *dsec; /* path-cued section (or empty) */ | struct mlink *mlinks; /* singly linked list */ | ||
const char *arch; /* path-cued arch. (or empty) */ | |||
const char *name; /* name (from filename) (not empty) */ | |||
}; | }; | ||
struct mlink { | |||
char file[PATH_MAX]; /* filename rel. to manpath */ | |||
enum form dform; /* format from directory */ | |||
enum form fform; /* format from file name suffix */ | |||
char *dsec; /* section from directory */ | |||
char *arch; /* architecture from directory */ | |||
char *name; /* name from file name (not empty) */ | |||
char *fsec; /* section from file name suffix */ | |||
struct mlink *next; /* singly linked list */ | |||
struct mpage *mpage; /* parent */ | |||
int gzip; /* filename has a .gz suffix */ | |||
}; | |||
enum stmt { | enum stmt { | ||
STMT_DELETE = 0, /* delete manpage */ | STMT_DELETE_PAGE = 0, /* delete mpage */ | ||
STMT_INSERT_DOC, /* insert manpage */ | STMT_INSERT_PAGE, /* insert mpage */ | ||
STMT_INSERT_KEY, /* insert parsed key */ | STMT_INSERT_LINK, /* insert mlink */ | ||
STMT_INSERT_KEY, /* insert parsed key */ | |||
STMT__MAX | STMT__MAX | ||
}; | }; | ||
typedef int (*mdoc_fp)(struct of *, const struct mdoc_node *); | typedef int (*mdoc_fp)(struct mpage *, const struct mdoc_node *); | ||
struct mdoc_handler { | struct mdoc_handler { | ||
mdoc_fp fp; /* optional handler */ | mdoc_fp fp; /* optional handler */ | ||
uint64_t mask; /* set unless handler returns 0 */ | uint64_t mask; /* set unless handler returns 0 */ | ||
int flags; /* for use by pmdoc_node */ | |||
#define MDOCF_CHILD 0x01 /* automatically index child nodes */ | |||
}; | }; | ||
static void dbclose(const char *, int); | static void dbclose(int); | ||
static void dbindex(struct mchars *, int, | static void dbadd(struct mpage *, struct mchars *); | ||
const struct of *, const char *); | static void dbadd_mlink(const struct mlink *mlink); | ||
static int dbopen(const char *, int); | static int dbopen(int); | ||
static void dbprune(const char *); | static void dbprune(void); | ||
static void fileadd(struct of *); | static void filescan(const char *); | ||
static int filecheck(const char *); | |||
static void filescan(const char *, const char *); | |||
static struct str *hashget(const char *, size_t); | |||
static void *hash_alloc(size_t, void *); | static void *hash_alloc(size_t, void *); | ||
static void hash_free(void *, size_t, void *); | static void hash_free(void *, size_t, void *); | ||
static void *hash_halloc(size_t, void *); | static void *hash_halloc(size_t, void *); | ||
static void inoadd(const struct stat *, struct of *); | static void mlink_add(struct mlink *, const struct stat *); | ||
static int inocheck(const struct stat *); | static int mlink_check(struct mpage *, struct mlink *); | ||
static void ofadd(const char *, int, const char *, | static void mlink_free(struct mlink *); | ||
const char *, const char *, const char *, | static void mlinks_undupe(struct mpage *); | ||
const char *, const struct stat *); | static void mpages_free(void); | ||
static void offree(void); | static void mpages_merge(struct mchars *, struct mparse *); | ||
static int ofmerge(struct mchars *, struct mparse *, const char *); | static void parse_cat(struct mpage *, int); | ||
static void parse_catpage(struct of *, const char *); | static void parse_man(struct mpage *, const struct man_node *); | ||
static int parse_man(struct of *, | static void parse_mdoc(struct mpage *, const struct mdoc_node *); | ||
const struct man_node *); | static int parse_mdoc_body(struct mpage *, const struct mdoc_node *); | ||
static void parse_mdoc(struct of *, const struct mdoc_node *); | static int parse_mdoc_head(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_body(struct of *, const struct mdoc_node *); | static int parse_mdoc_Fd(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_head(struct of *, const struct mdoc_node *); | static int parse_mdoc_Fn(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_Fd(struct of *, const struct mdoc_node *); | static int parse_mdoc_Nd(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_Fn(struct of *, const struct mdoc_node *); | static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_In(struct of *, const struct mdoc_node *); | static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_Nd(struct of *, const struct mdoc_node *); | static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *); | ||
static int parse_mdoc_Nm(struct of *, const struct mdoc_node *); | static void putkey(const struct mpage *, char *, uint64_t); | ||
static int parse_mdoc_Sh(struct of *, const struct mdoc_node *); | static void putkeys(const struct mpage *, | ||
static int parse_mdoc_St(struct of *, const struct mdoc_node *); | const char *, size_t, uint64_t); | ||
static int parse_mdoc_Xr(struct of *, const struct mdoc_node *); | static void putmdockey(const struct mpage *, | ||
static int path_reset(const char *, int, const char *); | |||
static void putkey(const struct of *, | |||
const char *, uint64_t); | |||
static void putkeys(const struct of *, | |||
const char *, int, uint64_t); | |||
static void putmdockey(const struct of *, | |||
const struct mdoc_node *, uint64_t); | const struct mdoc_node *, uint64_t); | ||
static char *stradd(const char *); | static void render_key(struct mchars *, struct str *); | ||
static char *straddbuf(const char *, size_t); | static void say(const char *, const char *, ...); | ||
static int treescan(const char *); | static int set_basedir(const char *); | ||
static int treescan(void); | |||
static size_t utf8(unsigned int, char [7]); | static size_t utf8(unsigned int, char [7]); | ||
static void utf8key(struct mchars *, struct str *); | |||
static void wordaddbuf(const struct of *, | |||
const char *, size_t, uint64_t); | |||
static char tempfilename[32]; | |||
static char *progname; | static char *progname; | ||
static int use_all; /* use all found files */ | |||
static int nodb; /* no database changes */ | static int nodb; /* no database changes */ | ||
static int verb; /* print what we're doing */ | static int mparse_options; /* abort the parse early */ | ||
static int use_all; /* use all found files */ | |||
static int debug; /* print what we're doing */ | |||
static int warnings; /* warn about crap */ | static int warnings; /* warn about crap */ | ||
static int write_utf8; /* write UTF-8 output; else ASCII */ | |||
static int exitcode; /* to be returned by main */ | |||
static enum op op; /* operational mode */ | static enum op op; /* operational mode */ | ||
static struct ohash inos; /* table of inodes/devices */ | static char basedir[PATH_MAX]; /* current base directory */ | ||
static struct ohash filenames; /* table of filenames */ | static struct ohash mpages; /* table of distinct manual pages */ | ||
static struct ohash mlinks; /* table of directory entries */ | |||
static struct ohash strings; /* table of all strings */ | static struct ohash strings; /* table of all strings */ | ||
static struct of *ofs = NULL; /* vector of files to parse */ | |||
static struct str *words = NULL; /* word list in current parse */ | |||
static sqlite3 *db = NULL; /* current database */ | static sqlite3 *db = NULL; /* current database */ | ||
static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ | static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ | ||
static const struct mdoc_handler mdocs[MDOC_MAX] = { | static const struct mdoc_handler mdocs[MDOC_MAX] = { | ||
{ NULL, 0, 0 }, /* Ap */ | { NULL, 0 }, /* Ap */ | ||
{ NULL, 0, 0 }, /* Dd */ | { NULL, 0 }, /* Dd */ | ||
{ NULL, 0, 0 }, /* Dt */ | { NULL, 0 }, /* Dt */ | ||
{ NULL, 0, 0 }, /* Os */ | { NULL, 0 }, /* Os */ | ||
{ parse_mdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ | { parse_mdoc_Sh, TYPE_Sh }, /* Sh */ | ||
{ parse_mdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ | { parse_mdoc_head, TYPE_Ss }, /* Ss */ | ||
{ NULL, 0, 0 }, /* Pp */ | { NULL, 0 }, /* Pp */ | ||
{ NULL, 0, 0 }, /* D1 */ | { NULL, 0 }, /* D1 */ | ||
{ NULL, 0, 0 }, /* Dl */ | { NULL, 0 }, /* Dl */ | ||
{ NULL, 0, 0 }, /* Bd */ | { NULL, 0 }, /* Bd */ | ||
{ NULL, 0, 0 }, /* Ed */ | { NULL, 0 }, /* Ed */ | ||
{ NULL, 0, 0 }, /* Bl */ | { NULL, 0 }, /* Bl */ | ||
{ NULL, 0, 0 }, /* El */ | { NULL, 0 }, /* El */ | ||
{ NULL, 0, 0 }, /* It */ | { NULL, 0 }, /* It */ | ||
{ NULL, 0, 0 }, /* Ad */ | { NULL, 0 }, /* Ad */ | ||
{ NULL, TYPE_An, MDOCF_CHILD }, /* An */ | { NULL, TYPE_An }, /* An */ | ||
{ NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ | { NULL, TYPE_Ar }, /* Ar */ | ||
{ NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ | { NULL, TYPE_Cd }, /* Cd */ | ||
{ NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ | { NULL, TYPE_Cm }, /* Cm */ | ||
{ NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ | { NULL, TYPE_Dv }, /* Dv */ | ||
{ NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ | { NULL, TYPE_Er }, /* Er */ | ||
{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ | { NULL, TYPE_Ev }, /* Ev */ | ||
{ NULL, 0, 0 }, /* Ex */ | { NULL, 0 }, /* Ex */ | ||
{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ | { NULL, TYPE_Fa }, /* Fa */ | ||
{ parse_mdoc_Fd, TYPE_In, 0 }, /* Fd */ | { parse_mdoc_Fd, 0 }, /* Fd */ | ||
{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ | { NULL, TYPE_Fl }, /* Fl */ | ||
{ parse_mdoc_Fn, 0, 0 }, /* Fn */ | { parse_mdoc_Fn, 0 }, /* Fn */ | ||
{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ | { NULL, TYPE_Ft }, /* Ft */ | ||
{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ | { NULL, TYPE_Ic }, /* Ic */ | ||
{ parse_mdoc_In, TYPE_In, MDOCF_CHILD }, /* In */ | { NULL, TYPE_In }, /* In */ | ||
{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ | { NULL, TYPE_Li }, /* Li */ | ||
{ parse_mdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ | { parse_mdoc_Nd, TYPE_Nd }, /* Nd */ | ||
{ parse_mdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ | { parse_mdoc_Nm, TYPE_Nm }, /* Nm */ | ||
{ NULL, 0, 0 }, /* Op */ | { NULL, 0 }, /* Op */ | ||
{ NULL, 0, 0 }, /* Ot */ | { NULL, 0 }, /* Ot */ | ||
{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ | { NULL, TYPE_Pa }, /* Pa */ | ||
{ NULL, 0, 0 }, /* Rv */ | { NULL, 0 }, /* Rv */ | ||
{ parse_mdoc_St, TYPE_St, 0 }, /* St */ | { NULL, TYPE_St }, /* St */ | ||
{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ | { NULL, TYPE_Va }, /* Va */ | ||
{ parse_mdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ | { parse_mdoc_body, TYPE_Va }, /* Vt */ | ||
{ parse_mdoc_Xr, TYPE_Xr, 0 }, /* Xr */ | { parse_mdoc_Xr, 0 }, /* Xr */ | ||
{ NULL, 0, 0 }, /* %A */ | { NULL, 0 }, /* %A */ | ||
{ NULL, 0, 0 }, /* %B */ | { NULL, 0 }, /* %B */ | ||
{ NULL, 0, 0 }, /* %D */ | { NULL, 0 }, /* %D */ | ||
{ NULL, 0, 0 }, /* %I */ | { NULL, 0 }, /* %I */ | ||
{ NULL, 0, 0 }, /* %J */ | { NULL, 0 }, /* %J */ | ||
{ NULL, 0, 0 }, /* %N */ | { NULL, 0 }, /* %N */ | ||
{ NULL, 0, 0 }, /* %O */ | { NULL, 0 }, /* %O */ | ||
{ NULL, 0, 0 }, /* %P */ | { NULL, 0 }, /* %P */ | ||
{ NULL, 0, 0 }, /* %R */ | { NULL, 0 }, /* %R */ | ||
{ NULL, 0, 0 }, /* %T */ | { NULL, 0 }, /* %T */ | ||
{ NULL, 0, 0 }, /* %V */ | { NULL, 0 }, /* %V */ | ||
{ NULL, 0, 0 }, /* Ac */ | { NULL, 0 }, /* Ac */ | ||
{ NULL, 0, 0 }, /* Ao */ | { NULL, 0 }, /* Ao */ | ||
{ NULL, 0, 0 }, /* Aq */ | { NULL, 0 }, /* Aq */ | ||
{ NULL, TYPE_At, MDOCF_CHILD }, /* At */ | { NULL, TYPE_At }, /* At */ | ||
{ NULL, 0, 0 }, /* Bc */ | { NULL, 0 }, /* Bc */ | ||
{ NULL, 0, 0 }, /* Bf */ | { NULL, 0 }, /* Bf */ | ||
{ NULL, 0, 0 }, /* Bo */ | { NULL, 0 }, /* Bo */ | ||
{ NULL, 0, 0 }, /* Bq */ | { NULL, 0 }, /* Bq */ | ||
{ NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ | { NULL, TYPE_Bsx }, /* Bsx */ | ||
{ NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ | { NULL, TYPE_Bx }, /* Bx */ | ||
{ NULL, 0, 0 }, /* Db */ | { NULL, 0 }, /* Db */ | ||
{ NULL, 0, 0 }, /* Dc */ | { NULL, 0 }, /* Dc */ | ||
{ NULL, 0, 0 }, /* Do */ | { NULL, 0 }, /* Do */ | ||
{ NULL, 0, 0 }, /* Dq */ | { NULL, 0 }, /* Dq */ | ||
{ NULL, 0, 0 }, /* Ec */ | { NULL, 0 }, /* Ec */ | ||
{ NULL, 0, 0 }, /* Ef */ | { NULL, 0 }, /* Ef */ | ||
{ NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ | { NULL, TYPE_Em }, /* Em */ | ||
{ NULL, 0, 0 }, /* Eo */ | { NULL, 0 }, /* Eo */ | ||
{ NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ | { NULL, TYPE_Fx }, /* Fx */ | ||
{ NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ | { NULL, TYPE_Ms }, /* Ms */ | ||
{ NULL, 0, 0 }, /* No */ | { NULL, 0 }, /* No */ | ||
{ NULL, 0, 0 }, /* Ns */ | { NULL, 0 }, /* Ns */ | ||
{ NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ | { NULL, TYPE_Nx }, /* Nx */ | ||
{ NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ | { NULL, TYPE_Ox }, /* Ox */ | ||
{ NULL, 0, 0 }, /* Pc */ | { NULL, 0 }, /* Pc */ | ||
{ NULL, 0, 0 }, /* Pf */ | { NULL, 0 }, /* Pf */ | ||
{ NULL, 0, 0 }, /* Po */ | { NULL, 0 }, /* Po */ | ||
{ NULL, 0, 0 }, /* Pq */ | { NULL, 0 }, /* Pq */ | ||
{ NULL, 0, 0 }, /* Qc */ | { NULL, 0 }, /* Qc */ | ||
{ NULL, 0, 0 }, /* Ql */ | { NULL, 0 }, /* Ql */ | ||
{ NULL, 0, 0 }, /* Qo */ | { NULL, 0 }, /* Qo */ | ||
{ NULL, 0, 0 }, /* Qq */ | { NULL, 0 }, /* Qq */ | ||
{ NULL, 0, 0 }, /* Re */ | { NULL, 0 }, /* Re */ | ||
{ NULL, 0, 0 }, /* Rs */ | { NULL, 0 }, /* Rs */ | ||
{ NULL, 0, 0 }, /* Sc */ | { NULL, 0 }, /* Sc */ | ||
{ NULL, 0, 0 }, /* So */ | { NULL, 0 }, /* So */ | ||
{ NULL, 0, 0 }, /* Sq */ | { NULL, 0 }, /* Sq */ | ||
{ NULL, 0, 0 }, /* Sm */ | { NULL, 0 }, /* Sm */ | ||
{ NULL, 0, 0 }, /* Sx */ | { NULL, 0 }, /* Sx */ | ||
{ NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ | { NULL, TYPE_Sy }, /* Sy */ | ||
{ NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ | { NULL, TYPE_Tn }, /* Tn */ | ||
{ NULL, 0, 0 }, /* Ux */ | { NULL, 0 }, /* Ux */ | ||
{ NULL, 0, 0 }, /* Xc */ | { NULL, 0 }, /* Xc */ | ||
{ NULL, 0, 0 }, /* Xo */ | { NULL, 0 }, /* Xo */ | ||
{ parse_mdoc_head, TYPE_Fn, 0 }, /* Fo */ | { parse_mdoc_head, 0 }, /* Fo */ | ||
{ NULL, 0, 0 }, /* Fc */ | { NULL, 0 }, /* Fc */ | ||
{ NULL, 0, 0 }, /* Oo */ | { NULL, 0 }, /* Oo */ | ||
{ NULL, 0, 0 }, /* Oc */ | { NULL, 0 }, /* Oc */ | ||
{ NULL, 0, 0 }, /* Bk */ | { NULL, 0 }, /* Bk */ | ||
{ NULL, 0, 0 }, /* Ek */ | { NULL, 0 }, /* Ek */ | ||
{ NULL, 0, 0 }, /* Bt */ | { NULL, 0 }, /* Bt */ | ||
{ NULL, 0, 0 }, /* Hf */ | { NULL, 0 }, /* Hf */ | ||
{ NULL, 0, 0 }, /* Fr */ | { NULL, 0 }, /* Fr */ | ||
{ NULL, 0, 0 }, /* Ud */ | { NULL, 0 }, /* Ud */ | ||
{ NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ | { NULL, TYPE_Lb }, /* Lb */ | ||
{ NULL, 0, 0 }, /* Lp */ | { NULL, 0 }, /* Lp */ | ||
{ NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ | { NULL, TYPE_Lk }, /* Lk */ | ||
{ NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ | { NULL, TYPE_Mt }, /* Mt */ | ||
{ NULL, 0, 0 }, /* Brq */ | { NULL, 0 }, /* Brq */ | ||
{ NULL, 0, 0 }, /* Bro */ | { NULL, 0 }, /* Bro */ | ||
{ NULL, 0, 0 }, /* Brc */ | { NULL, 0 }, /* Brc */ | ||
{ NULL, 0, 0 }, /* %C */ | { NULL, 0 }, /* %C */ | ||
{ NULL, 0, 0 }, /* Es */ | { NULL, 0 }, /* Es */ | ||
{ NULL, 0, 0 }, /* En */ | { NULL, 0 }, /* En */ | ||
{ NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ | { NULL, TYPE_Dx }, /* Dx */ | ||
{ NULL, 0, 0 }, /* %Q */ | { NULL, 0 }, /* %Q */ | ||
{ NULL, 0, 0 }, /* br */ | { NULL, 0 }, /* br */ | ||
{ NULL, 0, 0 }, /* sp */ | { NULL, 0 }, /* sp */ | ||
{ NULL, 0, 0 }, /* %U */ | { NULL, 0 }, /* %U */ | ||
{ NULL, 0, 0 }, /* Ta */ | { NULL, 0 }, /* Ta */ | ||
}; | }; | ||
int | int | ||
main(int argc, char *argv[]) | main(int argc, char *argv[]) | ||
{ | { | ||
char cwd[MAXPATHLEN]; | int ch, i; | ||
int ch, rc, fd, i; | |||
size_t j, sz; | size_t j, sz; | ||
const char *dir; | const char *path_arg; | ||
struct str *s; | |||
struct mchars *mc; | struct mchars *mc; | ||
struct manpaths dirs; | struct manpaths dirs; | ||
struct mparse *mp; | struct mparse *mp; | ||
struct ohash_info ino_info, filename_info, str_info; | struct ohash_info mpages_info, mlinks_info; | ||
memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); | memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); | ||
memset(&dirs, 0, sizeof(struct manpaths)); | memset(&dirs, 0, sizeof(struct manpaths)); | ||
ino_info.halloc = filename_info.halloc = | mpages_info.alloc = mlinks_info.alloc = hash_alloc; | ||
str_info.halloc = hash_halloc; | mpages_info.halloc = mlinks_info.halloc = hash_halloc; | ||
ino_info.hfree = filename_info.hfree = | mpages_info.hfree = mlinks_info.hfree = hash_free; | ||
str_info.hfree = hash_free; | |||
ino_info.alloc = filename_info.alloc = | |||
str_info.alloc = hash_alloc; | |||
ino_info.key_offset = offsetof(struct of, id); | mpages_info.key_offset = offsetof(struct mpage, inodev); | ||
filename_info.key_offset = offsetof(struct of, file); | mlinks_info.key_offset = offsetof(struct mlink, file); | ||
str_info.key_offset = offsetof(struct str, key); | |||
progname = strrchr(argv[0], '/'); | progname = strrchr(argv[0], '/'); | ||
if (progname == NULL) | if (progname == NULL) | ||
|
|
||
++progname; | ++progname; | ||
/* | /* | ||
* Remember where we started by keeping a fd open to the origin | |||
* path component: throughout this utility, we chdir() a lot to | |||
* handle relative paths, and by doing this, we can return to | |||
* the starting point. | |||
*/ | |||
if (NULL == getcwd(cwd, MAXPATHLEN)) { | |||
perror(NULL); | |||
return(EXIT_FAILURE); | |||
} else if (-1 == (fd = open(cwd, O_RDONLY, 0))) { | |||
perror(cwd); | |||
return(EXIT_FAILURE); | |||
} | |||
/* | |||
* We accept a few different invocations. | * We accept a few different invocations. | ||
* The CHECKOP macro makes sure that invocation styles don't | * The CHECKOP macro makes sure that invocation styles don't | ||
* clobber each other. | * clobber each other. | ||
|
|
||
goto usage; \ | goto usage; \ | ||
} while (/*CONSTCOND*/0) | } while (/*CONSTCOND*/0) | ||
dir = NULL; | path_arg = NULL; | ||
op = OP_DEFAULT; | op = OP_DEFAULT; | ||
while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW"))) | while (-1 != (ch = getopt(argc, argv, "aC:Dd:nQT:tu:W"))) | ||
switch (ch) { | switch (ch) { | ||
case ('a'): | case ('a'): | ||
use_all = 1; | use_all = 1; | ||
break; | break; | ||
case ('C'): | case ('C'): | ||
CHECKOP(op, ch); | CHECKOP(op, ch); | ||
dir = optarg; | path_arg = optarg; | ||
op = OP_CONFFILE; | op = OP_CONFFILE; | ||
break; | break; | ||
case ('D'): | |||
debug++; | |||
break; | |||
case ('d'): | case ('d'): | ||
CHECKOP(op, ch); | CHECKOP(op, ch); | ||
dir = optarg; | path_arg = optarg; | ||
op = OP_UPDATE; | op = OP_UPDATE; | ||
break; | break; | ||
case ('n'): | case ('n'): | ||
nodb = 1; | nodb = 1; | ||
break; | break; | ||
case ('Q'): | |||
mparse_options |= MPARSE_QUICK; | |||
break; | |||
case ('T'): | |||
if (strcmp(optarg, "utf8")) { | |||
fprintf(stderr, "-T%s: Unsupported " | |||
"output format\n", optarg); | |||
goto usage; | |||
} | |||
write_utf8 = 1; | |||
break; | |||
case ('t'): | case ('t'): | ||
CHECKOP(op, ch); | CHECKOP(op, ch); | ||
dup2(STDOUT_FILENO, STDERR_FILENO); | dup2(STDOUT_FILENO, STDERR_FILENO); | ||
|
|
||
break; | break; | ||
case ('u'): | case ('u'): | ||
CHECKOP(op, ch); | CHECKOP(op, ch); | ||
dir = optarg; | path_arg = optarg; | ||
op = OP_DELETE; | op = OP_DELETE; | ||
break; | break; | ||
case ('v'): | |||
verb++; | |||
break; | |||
case ('W'): | case ('W'): | ||
warnings = 1; | warnings = 1; | ||
break; | break; | ||
|
|
||
goto usage; | goto usage; | ||
} | } | ||
rc = 1; | exitcode = (int)MANDOCLEVEL_OK; | ||
mp = mparse_alloc(MPARSE_AUTO, | mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL); | ||
MANDOCLEVEL_FATAL, NULL, NULL, NULL); | |||
mc = mchars_alloc(); | mc = mchars_alloc(); | ||
ohash_init(&strings, 6, &str_info); | ohash_init(&mpages, 6, &mpages_info); | ||
ohash_init(&inos, 6, &ino_info); | ohash_init(&mlinks, 6, &mlinks_info); | ||
ohash_init(&filenames, 6, &filename_info); | |||
if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { | if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { | ||
/* | /* | ||
* Force processing all files. | * Force processing all files. | ||
*/ | */ | ||
use_all = 1; | use_all = 1; | ||
if (NULL == dir) | |||
dir = cwd; | |||
/* | /* | ||
* All of these deal with a specific directory. | * All of these deal with a specific directory. | ||
* Jump into that directory then collect files specified | * Jump into that directory then collect files specified | ||
* on the command-line. | * on the command-line. | ||
*/ | */ | ||
if (0 == path_reset(cwd, fd, dir)) | if (0 == set_basedir(path_arg)) | ||
goto out; | goto out; | ||
for (i = 0; i < argc; i++) | for (i = 0; i < argc; i++) | ||
filescan(argv[i], dir); | filescan(argv[i]); | ||
if (0 == dbopen(dir, 1)) | if (0 == dbopen(1)) | ||
goto out; | goto out; | ||
if (OP_TEST != op) | if (OP_TEST != op) | ||
dbprune(dir); | dbprune(); | ||
if (OP_DELETE != op) | if (OP_DELETE != op) | ||
rc = ofmerge(mc, mp, dir); | mpages_merge(mc, mp); | ||
dbclose(dir, 1); | dbclose(1); | ||
} else { | } else { | ||
/* | /* | ||
* If we have arguments, use them as our manpaths. | * If we have arguments, use them as our manpaths. | ||
|
|
||
for (i = 0; i < argc; i++) | for (i = 0; i < argc; i++) | ||
dirs.paths[i] = mandoc_strdup(argv[i]); | dirs.paths[i] = mandoc_strdup(argv[i]); | ||
} else | } else | ||
manpath_parse(&dirs, dir, NULL, NULL); | manpath_parse(&dirs, path_arg, NULL, NULL); | ||
/* | /* | ||
* First scan the tree rooted at a base directory. | * First scan the tree rooted at a base directory, then | ||
* Then whak its database (if one exists), parse, and | * build a new database and finally move it into place. | ||
* build up the database. | |||
* Ignore zero-length directories and strip trailing | * Ignore zero-length directories and strip trailing | ||
* slashes. | * slashes. | ||
*/ | */ | ||
|
|
||
dirs.paths[j][--sz] = '\0'; | dirs.paths[j][--sz] = '\0'; | ||
if (0 == sz) | if (0 == sz) | ||
continue; | continue; | ||
if (0 == path_reset(cwd, fd, dirs.paths[j])) | |||
if (j) { | |||
ohash_init(&mpages, 6, &mpages_info); | |||
ohash_init(&mlinks, 6, &mlinks_info); | |||
} | |||
if (0 == set_basedir(dirs.paths[j])) | |||
goto out; | goto out; | ||
if (0 == treescan(dirs.paths[j])) | if (0 == treescan()) | ||
goto out; | goto out; | ||
if (0 == path_reset(cwd, fd, dirs.paths[j])) | if (0 == set_basedir(dirs.paths[j])) | ||
goto out; | goto out; | ||
if (0 == dbopen(dirs.paths[j], 0)) | if (0 == dbopen(0)) | ||
goto out; | goto out; | ||
/* | mpages_merge(mc, mp); | ||
* Since we're opening up a new database, we can | dbclose(0); | ||
* turn off synchronous mode for much better | |||
* performance. | |||
*/ | |||
SQL_EXEC("PRAGMA synchronous = OFF"); | |||
if (0 == ofmerge(mc, mp, dirs.paths[j])) | if (j + 1 < dirs.sz) { | ||
goto out; | mpages_free(); | ||
dbclose(dirs.paths[j], 0); | ohash_delete(&mpages); | ||
offree(); | ohash_delete(&mlinks); | ||
ohash_delete(&inos); | } | ||
ohash_init(&inos, 6, &ino_info); | |||
ohash_delete(&filenames); | |||
ohash_init(&filenames, 6, &filename_info); | |||
} | } | ||
} | } | ||
out: | out: | ||
close(fd); | set_basedir(NULL); | ||
manpath_free(&dirs); | manpath_free(&dirs); | ||
mchars_free(mc); | mchars_free(mc); | ||
mparse_free(mp); | mparse_free(mp); | ||
for (s = ohash_first(&strings, &ch); | mpages_free(); | ||
NULL != s; s = ohash_next(&strings, &ch)) { | ohash_delete(&mpages); | ||
if (s->utf8 != s->key) | ohash_delete(&mlinks); | ||
free(s->utf8); | return(exitcode); | ||
free(s); | |||
} | |||
ohash_delete(&strings); | |||
ohash_delete(&inos); | |||
ohash_delete(&filenames); | |||
offree(); | |||
return(rc ? EXIT_SUCCESS : EXIT_FAILURE); | |||
usage: | usage: | ||
fprintf(stderr, "usage: %s [-anvW] [-C file]\n" | fprintf(stderr, "usage: %s [-aDnQW] [-C file] [-Tutf8]\n" | ||
" %s [-anvW] dir ...\n" | " %s [-aDnQW] [-Tutf8] dir ...\n" | ||
" %s [-nvW] -d dir [file ...]\n" | " %s [-DnQW] [-Tutf8] -d dir [file ...]\n" | ||
" %s [-nvW] -u dir [file ...]\n" | " %s [-DnW] -u dir [file ...]\n" | ||
" %s -t file ...\n", | " %s [-Q] -t file ...\n", | ||
progname, progname, progname, | progname, progname, progname, | ||
progname, progname); | progname, progname); | ||
return(EXIT_FAILURE); | return((int)MANDOCLEVEL_BADARG); | ||
} | } | ||
/* | /* | ||
* Scan a directory tree rooted at "base" for manpages. | * Scan a directory tree rooted at "basedir" for manpages. | ||
* We use fts(), scanning directory parts along the way for clues to our | * We use fts(), scanning directory parts along the way for clues to our | ||
* section and architecture. | * section and architecture. | ||
* | * | ||
|
|
||
* TODO: accomodate for multi-language directories. | * TODO: accomodate for multi-language directories. | ||
*/ | */ | ||
static int | static int | ||
treescan(const char *base) | treescan(void) | ||
{ | { | ||
FTS *f; | FTS *f; | ||
FTSENT *ff; | FTSENT *ff; | ||
int dform; | struct mlink *mlink; | ||
char *sec; | int dform, gzip; | ||
const char *dsec, *arch, *cp, *name, *path; | char *dsec, *arch, *fsec, *cp; | ||
const char *path; | |||
const char *argv[2]; | const char *argv[2]; | ||
argv[0] = "."; | argv[0] = "."; | ||
|
|
||
*/ | */ | ||
f = fts_open((char * const *)argv, FTS_LOGICAL, NULL); | f = fts_open((char * const *)argv, FTS_LOGICAL, NULL); | ||
if (NULL == f) { | if (NULL == f) { | ||
perror(base); | exitcode = (int)MANDOCLEVEL_SYSERR; | ||
say("", "&fts_open"); | |||
return(0); | return(0); | ||
} | } | ||
|
|
||
while (NULL != (ff = fts_read(f))) { | while (NULL != (ff = fts_read(f))) { | ||
path = ff->fts_path + 2; | path = ff->fts_path + 2; | ||
/* | /* | ||
* If we're a regular file, add an "of" by using the | * If we're a regular file, add an mlink by using the | ||
* stored directory data and handling the filename. | * stored directory data and handling the filename. | ||
* Disallow duplicate (hard-linked) files. | |||
*/ | */ | ||
if (FTS_F == ff->fts_info) { | if (FTS_F == ff->fts_info) { | ||
if (0 == strcmp(path, MANDOC_DB)) | |||
continue; | |||
if ( ! use_all && ff->fts_level < 2) { | if ( ! use_all && ff->fts_level < 2) { | ||
WARNING(path, base, "Extraneous file"); | if (warnings) | ||
say(path, "Extraneous file"); | |||
continue; | continue; | ||
} else if (inocheck(ff->fts_statp)) { | } | ||
WARNING(path, base, "Duplicate file"); | gzip = 0; | ||
continue; | fsec = NULL; | ||
} | while (NULL == fsec) { | ||
fsec = strrchr(ff->fts_name, '.'); | |||
cp = ff->fts_name; | if (NULL == fsec || strcmp(fsec+1, "gz")) | ||
break; | |||
if (0 == strcmp(cp, "mandocdb.db")) { | gzip = 1; | ||
WARNING(path, base, "Skip database"); | *fsec = '\0'; | ||
continue; | fsec = NULL; | ||
} else if (NULL != (cp = strrchr(cp, '.'))) { | } | ||
if (0 == strcmp(cp + 1, "html")) { | if (NULL == fsec) { | ||
WARNING(path, base, "Skip html"); | if ( ! use_all) { | ||
if (warnings) | |||
say(path, | |||
"No filename suffix"); | |||
continue; | continue; | ||
} else if (0 == strcmp(cp + 1, "gz")) { | |||
WARNING(path, base, "Skip gz"); | |||
continue; | |||
} else if (0 == strcmp(cp + 1, "ps")) { | |||
WARNING(path, base, "Skip ps"); | |||
continue; | |||
} else if (0 == strcmp(cp + 1, "pdf")) { | |||
WARNING(path, base, "Skip pdf"); | |||
continue; | |||
} | } | ||
} | } else if (0 == strcmp(++fsec, "html")) { | ||
if (warnings) | |||
say(path, "Skip html"); | |||
continue; | |||
} else if (0 == strcmp(fsec, "ps")) { | |||
if (warnings) | |||
say(path, "Skip ps"); | |||
continue; | |||
} else if (0 == strcmp(fsec, "pdf")) { | |||
if (warnings) | |||
say(path, "Skip pdf"); | |||
continue; | |||
} else if ( ! use_all && | |||
((FORM_SRC == dform && strcmp(fsec, dsec)) || | |||
(FORM_CAT == dform && strcmp(fsec, "0")))) { | |||
if (warnings) | |||
say(path, "Wrong filename suffix"); | |||
continue; | |||
} else | |||
fsec[-1] = '\0'; | |||
if (NULL != (sec = strrchr(ff->fts_name, '.'))) { | mlink = mandoc_calloc(1, sizeof(struct mlink)); | ||
*sec = '\0'; | strlcpy(mlink->file, path, sizeof(mlink->file)); | ||
sec = stradd(sec + 1); | mlink->dform = dform; | ||
} | mlink->dsec = dsec; | ||
name = stradd(ff->fts_name); | mlink->arch = arch; | ||
ofadd(base, dform, path, | mlink->name = ff->fts_name; | ||
name, dsec, sec, arch, ff->fts_statp); | mlink->fsec = fsec; | ||
mlink->gzip = gzip; | |||
mlink_add(mlink, ff->fts_statp); | |||
continue; | continue; | ||
} else if (FTS_D != ff->fts_info && | } else if (FTS_D != ff->fts_info && | ||
FTS_DP != ff->fts_info) | FTS_DP != ff->fts_info) { | ||
if (warnings) | |||
say(path, "Not a regular file"); | |||
continue; | continue; | ||
} | |||
switch (ff->fts_level) { | switch (ff->fts_level) { | ||
case (0): | case (0): | ||
|
|
||
* Try to infer this from the name. | * Try to infer this from the name. | ||
* If we're not in use_all, enforce it. | * If we're not in use_all, enforce it. | ||
*/ | */ | ||
dsec = NULL; | |||
dform = FORM_NONE; | |||
cp = ff->fts_name; | cp = ff->fts_name; | ||
if (FTS_DP == ff->fts_info) | if (FTS_DP == ff->fts_info) | ||
break; | break; | ||
if (0 == strncmp(cp, "man", 3)) { | if (0 == strncmp(cp, "man", 3)) { | ||
dform = FORM_SRC; | dform = FORM_SRC; | ||
dsec = stradd(cp + 3); | dsec = cp + 3; | ||
} else if (0 == strncmp(cp, "cat", 3)) { | } else if (0 == strncmp(cp, "cat", 3)) { | ||
dform = FORM_CAT; | dform = FORM_CAT; | ||
dsec = stradd(cp + 3); | dsec = cp + 3; | ||
} else { | |||
dform = FORM_NONE; | |||
dsec = NULL; | |||
} | } | ||
if (NULL != dsec || use_all) | if (NULL != dsec || use_all) | ||
break; | break; | ||
WARNING(path, base, "Unknown directory part"); | if (warnings) | ||
say(path, "Unknown directory part"); | |||
fts_set(f, ff, FTS_SKIP); | fts_set(f, ff, FTS_SKIP); | ||
break; | break; | ||
case (2): | case (2): | ||
|
|
||
* Possibly our architecture. | * Possibly our architecture. | ||
* If we're descending, keep tabs on it. | * If we're descending, keep tabs on it. | ||
*/ | */ | ||
arch = NULL; | |||
if (FTS_DP != ff->fts_info && NULL != dsec) | if (FTS_DP != ff->fts_info && NULL != dsec) | ||
arch = stradd(ff->fts_name); | arch = ff->fts_name; | ||
else | |||
arch = NULL; | |||
break; | break; | ||
default: | default: | ||
if (FTS_DP == ff->fts_info || use_all) | if (FTS_DP == ff->fts_info || use_all) | ||
break; | break; | ||
WARNING(path, base, "Extraneous directory part"); | if (warnings) | ||
say(path, "Extraneous directory part"); | |||
fts_set(f, ff, FTS_SKIP); | fts_set(f, ff, FTS_SKIP); | ||
break; | break; | ||
} | } | ||
|
|
||
} | } | ||
/* | /* | ||
* Add a file to the file vector. | * Add a file to the mlinks table. | ||
* Do not verify that it's a "valid" looking manpage (we'll do that | * Do not verify that it's a "valid" looking manpage (we'll do that | ||
* later). | * later). | ||
* | * | ||
|
|
||
* or | * or | ||
* [./]cat<section>[/<arch>]/<name>.0 | * [./]cat<section>[/<arch>]/<name>.0 | ||
* | * | ||
* Stuff this information directly into the "of" vector. | |||
* See treescan() for the fts(3) version of this. | * See treescan() for the fts(3) version of this. | ||
*/ | */ | ||
static void | static void | ||
filescan(const char *file, const char *base) | filescan(const char *file) | ||
{ | { | ||
const char *sec, *arch, *name, *dsec; | char buf[PATH_MAX]; | ||
char *p, *start, *buf; | |||
int dform; | |||
struct stat st; | struct stat st; | ||
struct mlink *mlink; | |||
char *p, *start; | |||
assert(use_all); | assert(use_all); | ||
if (0 == strncmp(file, "./", 2)) | if (0 == strncmp(file, "./", 2)) | ||
file += 2; | file += 2; | ||
if (-1 == stat(file, &st)) { | if (NULL == realpath(file, buf)) { | ||
WARNING(file, base, "%s", strerror(errno)); | exitcode = (int)MANDOCLEVEL_BADARG; | ||
say(file, "&realpath"); | |||
return; | return; | ||
} else if ( ! (S_IFREG & st.st_mode)) { | } | ||
WARNING(file, base, "Not a regular file"); | |||
if (strstr(buf, basedir) == buf) | |||
start = buf + strlen(basedir) + 1; | |||
else if (OP_TEST == op) | |||
start = buf; | |||
else { | |||
exitcode = (int)MANDOCLEVEL_BADARG; | |||
say("", "%s: outside base directory", buf); | |||
return; | return; | ||
} else if (inocheck(&st)) { | } | ||
WARNING(file, base, "Duplicate file"); | |||
if (-1 == stat(buf, &st)) { | |||
exitcode = (int)MANDOCLEVEL_BADARG; | |||
say(file, "&stat"); | |||
return; | return; | ||
} else if ( ! (S_IFREG & st.st_mode)) { | |||
exitcode = (int)MANDOCLEVEL_BADARG; | |||
say(file, "Not a regular file"); | |||
return; | |||
} | } | ||
buf = mandoc_strdup(file); | mlink = mandoc_calloc(1, sizeof(struct mlink)); | ||
start = buf; | strlcpy(mlink->file, start, sizeof(mlink->file)); | ||
sec = arch = name = dsec = NULL; | |||
dform = FORM_NONE; | |||
/* | /* | ||
* First try to guess our directory structure. | * First try to guess our directory structure. | ||
|
|
||
if (NULL != (p = strchr(start, '/'))) { | if (NULL != (p = strchr(start, '/'))) { | ||
*p++ = '\0'; | *p++ = '\0'; | ||
if (0 == strncmp(start, "man", 3)) { | if (0 == strncmp(start, "man", 3)) { | ||
dform = FORM_SRC; | mlink->dform = FORM_SRC; | ||
dsec = start + 3; | mlink->dsec = start + 3; | ||
} else if (0 == strncmp(start, "cat", 3)) { | } else if (0 == strncmp(start, "cat", 3)) { | ||
dform = FORM_CAT; | mlink->dform = FORM_CAT; | ||
dsec = start + 3; | mlink->dsec = start + 3; | ||
} | } | ||
start = p; | start = p; | ||
if (NULL != dsec && NULL != (p = strchr(start, '/'))) { | if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { | ||
*p++ = '\0'; | *p++ = '\0'; | ||
arch = start; | mlink->arch = start; | ||
start = p; | start = p; | ||
} | } | ||
} | } | ||
/* | /* | ||
|
|
||
if ('.' == *p) { | if ('.' == *p) { | ||
*p++ = '\0'; | *p++ = '\0'; | ||
sec = p; | mlink->fsec = p; | ||
} | } | ||
/* | /* | ||
* Now try to parse the name. | * Now try to parse the name. | ||
* Use the filename portion of the path. | * Use the filename portion of the path. | ||
*/ | */ | ||
name = start; | mlink->name = start; | ||
if (NULL != (p = strrchr(start, '/'))) { | if (NULL != (p = strrchr(start, '/'))) { | ||
name = p + 1; | mlink->name = p + 1; | ||
*p = '\0'; | *p = '\0'; | ||
} | } | ||
mlink_add(mlink, &st); | |||
ofadd(base, dform, file, name, dsec, sec, arch, &st); | |||
free(buf); | |||
} | } | ||
/* | static void | ||
* See fileadd(). | mlink_add(struct mlink *mlink, const struct stat *st) | ||
*/ | |||
static int | |||
filecheck(const char *name) | |||
{ | { | ||
unsigned int index; | struct inodev inodev; | ||
struct mpage *mpage; | |||
unsigned int slot; | |||
index = ohash_qlookup(&filenames, name); | assert(NULL != mlink->file); | ||
return(NULL != ohash_find(&filenames, index)); | |||
mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); | |||
mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); | |||
mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); | |||
mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); | |||
if ('0' == *mlink->fsec) { | |||
free(mlink->fsec); | |||
mlink->fsec = mandoc_strdup(mlink->dsec); | |||
mlink->fform = FORM_CAT; | |||
} else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) | |||
mlink->fform = FORM_SRC; | |||
else | |||
mlink->fform = FORM_NONE; | |||
slot = ohash_qlookup(&mlinks, mlink->file); | |||
assert(NULL == ohash_find(&mlinks, slot)); | |||
ohash_insert(&mlinks, slot, mlink); | |||
inodev.st_ino = st->st_ino; | |||
inodev.st_dev = st->st_dev; | |||
slot = ohash_lookup_memory(&mpages, (char *)&inodev, | |||
sizeof(struct inodev), inodev.st_ino); | |||
mpage = ohash_find(&mpages, slot); | |||
if (NULL == mpage) { | |||
mpage = mandoc_calloc(1, sizeof(struct mpage)); | |||
mpage->inodev.st_ino = inodev.st_ino; | |||
mpage->inodev.st_dev = inodev.st_dev; | |||
ohash_insert(&mpages, slot, mpage); | |||
} else | |||
mlink->next = mpage->mlinks; | |||
mpage->mlinks = mlink; | |||
mlink->mpage = mpage; | |||
} | } | ||
/* | |||
* Use the standard hashing mechanism (K&R) to see if the given filename | |||
* already exists. | |||
*/ | |||
static void | static void | ||
fileadd(struct of *of) | mlink_free(struct mlink *mlink) | ||
{ | { | ||
unsigned int index; | |||
index = ohash_qlookup(&filenames, of->file); | free(mlink->dsec); | ||
assert(NULL == ohash_find(&filenames, index)); | free(mlink->arch); | ||
ohash_insert(&filenames, index, of); | free(mlink->name); | ||
free(mlink->fsec); | |||
free(mlink); | |||
} | } | ||
/* | static void | ||
* See inoadd(). | mpages_free(void) | ||
*/ | |||
static int | |||
inocheck(const struct stat *st) | |||
{ | { | ||
struct id id; | struct mpage *mpage; | ||
uint32_t hash; | struct mlink *mlink; | ||
unsigned int index; | unsigned int slot; | ||
memset(&id, 0, sizeof(id)); | mpage = ohash_first(&mpages, &slot); | ||
id.ino = hash = st->st_ino; | while (NULL != mpage) { | ||
id.dev = st->st_dev; | while (NULL != (mlink = mpage->mlinks)) { | ||
index = ohash_lookup_memory | mpage->mlinks = mlink->next; | ||
(&inos, (char *)&id, sizeof(id), hash); | mlink_free(mlink); | ||
} | |||
return(NULL != ohash_find(&inos, index)); | free(mpage->sec); | ||
free(mpage->arch); | |||
free(mpage->title); | |||
free(mpage->desc); | |||
free(mpage); | |||
mpage = ohash_next(&mpages, &slot); | |||
} | |||
} | } | ||
/* | /* | ||
* The hashing function used here is quite simple: simply take the inode | * For each mlink to the mpage, check whether the path looks like | ||
* and use uint32_t of its bits. | * it is formatted, and if it does, check whether a source manual | ||
* Then when we do the lookup, use both the inode and device identifier. | * exists by the same name, ignoring the suffix. | ||
* If both conditions hold, drop the mlink. | |||
*/ | */ | ||
static void | static void | ||
inoadd(const struct stat *st, struct of *of) | mlinks_undupe(struct mpage *mpage) | ||
{ | { | ||
uint32_t hash; | char buf[PATH_MAX]; | ||
unsigned int index; | struct mlink **prev; | ||
struct mlink *mlink; | |||
char *bufp; | |||
of->id.ino = hash = st->st_ino; | mpage->form = FORM_CAT; | ||
of->id.dev = st->st_dev; | prev = &mpage->mlinks; | ||
index = ohash_lookup_memory | while (NULL != (mlink = *prev)) { | ||
(&inos, (char *)&of->id, sizeof(of->id), hash); | if (FORM_CAT != mlink->dform) { | ||
mpage->form = FORM_NONE; | |||
assert(NULL == ohash_find(&inos, index)); | goto nextlink; | ||
ohash_insert(&inos, index, of); | } | ||
if (strlcpy(buf, mlink->file, PATH_MAX) >= PATH_MAX) { | |||
if (warnings) | |||
say(mlink->file, "Filename too long"); | |||
goto nextlink; | |||
} | |||
bufp = strstr(buf, "cat"); | |||
assert(NULL != bufp); | |||
memcpy(bufp, "man", 3); | |||
if (NULL != (bufp = strrchr(buf, '.'))) | |||
*++bufp = '\0'; | |||
strlcat(buf, mlink->dsec, PATH_MAX); | |||
if (NULL == ohash_find(&mlinks, | |||
ohash_qlookup(&mlinks, buf))) | |||
goto nextlink; | |||
if (warnings) | |||
say(mlink->file, "Man source exists: %s", buf); | |||
if (use_all) | |||
goto nextlink; | |||
*prev = mlink->next; | |||
mlink_free(mlink); | |||
continue; | |||
nextlink: | |||
prev = &(*prev)->next; | |||
} | |||
} | } | ||
static void | static int | ||
ofadd(const char *base, int dform, const char *file, | mlink_check(struct mpage *mpage, struct mlink *mlink) | ||
const char *name, const char *dsec, const char *sec, | |||
const char *arch, const struct stat *st) | |||
{ | { | ||
struct of *of; | int match; | ||
int sform; | |||
assert(NULL != file); | match = 1; | ||
if (NULL == name) | /* | ||
name = ""; | * Check whether the manual section given in a file | ||
if (NULL == sec) | * agrees with the directory where the file is located. | ||
sec = ""; | * Some manuals have suffixes like (3p) on their | ||
if (NULL == dsec) | * section number either inside the file or in the | ||
dsec = ""; | * directory name, some are linked into more than one | ||
if (NULL == arch) | * section, like encrypt(1) = makekey(8). | ||
arch = ""; | */ | ||
sform = FORM_NONE; | if (FORM_SRC == mpage->form && | ||
if (NULL != sec && *sec <= '9' && *sec >= '1') | strcasecmp(mpage->sec, mlink->dsec)) { | ||
sform = FORM_SRC; | match = 0; | ||
else if (NULL != sec && *sec == '0') { | say(mlink->file, "Section \"%s\" manual in %s directory", | ||
sec = dsec; | mpage->sec, mlink->dsec); | ||
sform = FORM_CAT; | |||
} | } | ||
of = mandoc_calloc(1, sizeof(struct of)); | |||
strlcpy(of->file, file, MAXPATHLEN); | |||
of->name = name; | |||
of->sec = sec; | |||
of->dsec = dsec; | |||
of->arch = arch; | |||
of->sform = sform; | |||
of->dform = dform; | |||
of->next = ofs; | |||
ofs = of; | |||
/* | /* | ||
* Add to unique identifier hash. | * Manual page directories exist for each kernel | ||
* Then if it's a source manual and we're going to use source in | * architecture as returned by machine(1). | ||
* favour of catpages, add it to that hash. | * However, many manuals only depend on the | ||
* application architecture as returned by arch(1). | |||
* For example, some (2/ARM) manuals are shared | |||
* across the "armish" and "zaurus" kernel | |||
* architectures. | |||
* A few manuals are even shared across completely | |||
* different architectures, for example fdformat(1) | |||
* on amd64, i386, sparc, and sparc64. | |||
*/ | */ | ||
inoadd(st, of); | |||
fileadd(of); | |||
} | |||
static void | if (strcasecmp(mpage->arch, mlink->arch)) { | ||
offree(void) | match = 0; | ||
{ | say(mlink->file, "Architecture \"%s\" manual in " | ||
struct of *of; | "\"%s\" directory", mpage->arch, mlink->arch); | ||
while (NULL != (of = ofs)) { | |||
ofs = of->next; | |||
free(of); | |||
} | } | ||
if (strcasecmp(mpage->title, mlink->name)) | |||
match = 0; | |||
return(match); | |||
} | } | ||
/* | /* | ||
* Run through the files in the global vector "ofs" and add them to the | * Run through the files in the global vector "mpages" | ||
* database specified in "base". | * and add them to the database specified in "basedir". | ||
* | * | ||
* This handles the parsing scheme itself, using the cues of directory | * This handles the parsing scheme itself, using the cues of directory | ||
* and filename to determine whether the file is parsable or not. | * and filename to determine whether the file is parsable or not. | ||
*/ | */ | ||
static int | static void | ||
ofmerge(struct mchars *mc, struct mparse *mp, const char *base) | mpages_merge(struct mchars *mc, struct mparse *mp) | ||
{ | { | ||
int form; | char any[] = "any"; | ||
size_t sz; | struct ohash_info str_info; | ||
struct mdoc *mdoc; | int fd[2]; | ||
struct man *man; | struct mpage *mpage, *mpage_dest; | ||
char buf[MAXPATHLEN]; | struct mlink *mlink, *mlink_dest; | ||
char *bufp; | struct mdoc *mdoc; | ||
const char *msec, *march, *mtitle, *cp; | struct man *man; | ||
struct of *of; | char *sodest; | ||
enum mandoclevel lvl; | char *cp; | ||
pid_t child_pid; | |||
int match, status; | |||
unsigned int pslot; | |||
enum mandoclevel lvl; | |||
for (of = ofs; NULL != of; of = of->next) { | str_info.alloc = hash_alloc; | ||
/* | str_info.halloc = hash_halloc; | ||
* If we're a catpage (as defined by our path), then see | str_info.hfree = hash_free; | ||
* if a manpage exists by the same name (ignoring the | str_info.key_offset = offsetof(struct str, key); | ||
* suffix). | |||
* If it does, then we want to use it instead of our | if (0 == nodb) | ||
* own. | SQL_EXEC("BEGIN TRANSACTION"); | ||
*/ | |||
if ( ! use_all && FORM_CAT == of->dform) { | mpage = ohash_first(&mpages, &pslot); | ||
sz = strlcpy(buf, of->file, MAXPATHLEN); | while (NULL != mpage) { | ||
if (sz >= MAXPATHLEN) { | mlinks_undupe(mpage); | ||
WARNING(of->file, base, | if (NULL == mpage->mlinks) { | ||
"Filename too long"); | mpage = ohash_next(&mpages, &pslot); | ||
continue; | continue; | ||
} | |||
bufp = strstr(buf, "cat"); | |||
assert(NULL != bufp); | |||
memcpy(bufp, "man", 3); | |||
if (NULL != (bufp = strrchr(buf, '.'))) | |||
*++bufp = '\0'; | |||
strlcat(buf, of->dsec, MAXPATHLEN); | |||
if (filecheck(buf)) { | |||
WARNING(of->file, base, "Man " | |||
"source exists: %s", buf); | |||
continue; | |||
} | |||
} | } | ||
words = NULL; | ohash_init(&strings, 6, &str_info); | ||
mparse_reset(mp); | mparse_reset(mp); | ||
mdoc = NULL; | mdoc = NULL; | ||
man = NULL; | man = NULL; | ||
form = 0; | sodest = NULL; | ||
msec = of->dsec; | child_pid = 0; | ||
march = of->arch; | fd[0] = -1; | ||
mtitle = of->name; | fd[1] = -1; | ||
if (mpage->mlinks->gzip) { | |||
if (-1 == pipe(fd)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(mpage->mlinks->file, "&pipe gunzip"); | |||
goto nextpage; | |||
} | |||
switch (child_pid = fork()) { | |||
case (-1): | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(mpage->mlinks->file, "&fork gunzip"); | |||
child_pid = 0; | |||
close(fd[1]); | |||
close(fd[0]); | |||
goto nextpage; | |||
case (0): | |||
close(fd[0]); | |||
if (-1 == dup2(fd[1], STDOUT_FILENO)) { | |||
say(mpage->mlinks->file, | |||
"&dup gunzip"); | |||
exit(1); | |||
} | |||
execlp("gunzip", "gunzip", "-c", | |||
mpage->mlinks->file, NULL); | |||
say(mpage->mlinks->file, "&exec gunzip"); | |||
exit(1); | |||
default: | |||
close(fd[1]); | |||
break; | |||
} | |||
} | |||
/* | /* | ||
* Try interpreting the file as mdoc(7) or man(7) | * Try interpreting the file as mdoc(7) or man(7) | ||
* source code, unless it is already known to be | * source code, unless it is already known to be | ||
* formatted. Fall back to formatted mode. | * formatted. Fall back to formatted mode. | ||
*/ | */ | ||
if (FORM_SRC == of->dform || FORM_SRC == of->sform) { | if (FORM_CAT != mpage->mlinks->dform || | ||
lvl = mparse_readfd(mp, -1, of->file); | FORM_CAT != mpage->mlinks->fform) { | ||
lvl = mparse_readfd(mp, fd[0], mpage->mlinks->file); | |||
if (lvl < MANDOCLEVEL_FATAL) | if (lvl < MANDOCLEVEL_FATAL) | ||
mparse_result(mp, &mdoc, &man); | mparse_result(mp, &mdoc, &man, &sodest); | ||
} | } | ||
if (NULL != mdoc) { | if (NULL != sodest) { | ||
form = 1; | mlink_dest = ohash_find(&mlinks, | ||
msec = mdoc_meta(mdoc)->msec; | ohash_qlookup(&mlinks, sodest)); | ||
march = mdoc_meta(mdoc)->arch; | if (NULL != mlink_dest) { | ||
mtitle = mdoc_meta(mdoc)->title; | |||
} else if (NULL != man) { | |||
form = 1; | |||
msec = man_meta(man)->msec; | |||
march = ""; | |||
mtitle = man_meta(man)->title; | |||
} | |||
if (NULL == msec) | /* The .so target exists. */ | ||
msec = ""; | |||
if (NULL == march) | |||
march = ""; | |||
if (NULL == mtitle) | |||
mtitle = ""; | |||
/* | mpage_dest = mlink_dest->mpage; | ||
* Check whether the manual section given in a file | mlink = mpage->mlinks; | ||
* agrees with the directory where the file is located. | while (1) { | ||
* Some manuals have suffixes like (3p) on their | mlink->mpage = mpage_dest; | ||
* section number either inside the file or in the | |||
* directory name, some are linked into more than one | |||
* section, like encrypt(1) = makekey(8). Do not skip | |||
* manuals for such reasons. | |||
*/ | |||
if ( ! use_all && form && strcasecmp(msec, of->dsec)) | |||
WARNING(of->file, base, "Section \"%s\" " | |||
"manual in %s directory", | |||
msec, of->dsec); | |||
/* | /* | ||
* Manual page directories exist for each kernel | * If the target was already | ||
* architecture as returned by machine(1). | * processed, add the links | ||
* However, many manuals only depend on the | * to the database now. | ||
* application architecture as returned by arch(1). | * Otherwise, this will | ||
* For example, some (2/ARM) manuals are shared | * happen when we come | ||
* across the "armish" and "zaurus" kernel | * to the target. | ||
* architectures. | */ | ||
* A few manuals are even shared across completely | |||
* different architectures, for example fdformat(1) | |||
* on amd64, i386, sparc, and sparc64. | |||
* Thus, warn about architecture mismatches, | |||
* but don't skip manuals for this reason. | |||
*/ | |||
if ( ! use_all && strcasecmp(march, of->arch)) | |||
WARNING(of->file, base, "Architecture \"%s\" " | |||
"manual in \"%s\" directory", | |||
march, of->arch); | |||
putkey(of, of->name, TYPE_Nm); | if (mpage_dest->recno) | ||
dbadd_mlink(mlink); | |||
if (NULL == mlink->next) | |||
break; | |||
mlink = mlink->next; | |||
} | |||
/* Move all links to the target. */ | |||
mlink->next = mlink_dest->next; | |||
mlink_dest->next = mpage->mlinks; | |||
mpage->mlinks = NULL; | |||
} | |||
goto nextpage; | |||
} else if (NULL != mdoc) { | |||
mpage->form = FORM_SRC; | |||
mpage->sec = | |||
mandoc_strdup(mdoc_meta(mdoc)->msec); | |||
mpage->arch = mdoc_meta(mdoc)->arch; | |||
mpage->arch = mandoc_strdup( | |||
NULL == mpage->arch ? "" : mpage->arch); | |||
mpage->title = | |||
mandoc_strdup(mdoc_meta(mdoc)->title); | |||
} else if (NULL != man) { | |||
mpage->form = FORM_SRC; | |||
mpage->sec = | |||
mandoc_strdup(man_meta(man)->msec); | |||
mpage->arch = | |||
mandoc_strdup(mpage->mlinks->arch); | |||
mpage->title = | |||
mandoc_strdup(man_meta(man)->title); | |||
} else { | |||
mpage->form = FORM_CAT; | |||
mpage->sec = | |||
mandoc_strdup(mpage->mlinks->dsec); | |||
mpage->arch = | |||
mandoc_strdup(mpage->mlinks->arch); | |||
mpage->title = | |||
mandoc_strdup(mpage->mlinks->name); | |||
} | |||
putkey(mpage, mpage->sec, TYPE_sec); | |||
putkey(mpage, '\0' == *mpage->arch ? | |||
any : mpage->arch, TYPE_arch); | |||
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) { | |||
if ('\0' != *mlink->dsec) | |||
putkey(mpage, mlink->dsec, TYPE_sec); | |||
if ('\0' != *mlink->fsec) | |||
putkey(mpage, mlink->fsec, TYPE_sec); | |||
putkey(mpage, '\0' == *mlink->arch ? | |||
any : mlink->arch, TYPE_arch); | |||
putkey(mpage, mlink->name, TYPE_Nm); | |||
} | |||
if (warnings && !use_all) { | |||
match = 0; | |||
for (mlink = mpage->mlinks; mlink; | |||
mlink = mlink->next) | |||
if (mlink_check(mpage, mlink)) | |||
match = 1; | |||
} else | |||
match = 1; | |||
if (NULL != mdoc) { | if (NULL != mdoc) { | ||
if (NULL != (cp = mdoc_meta(mdoc)->name)) | if (NULL != (cp = mdoc_meta(mdoc)->name)) | ||
putkey(of, cp, TYPE_Nm); | putkey(mpage, cp, TYPE_Nm); | ||
parse_mdoc(of, mdoc_node(mdoc)); | assert(NULL == mpage->desc); | ||
parse_mdoc(mpage, mdoc_node(mdoc)); | |||
putkey(mpage, NULL != mpage->desc ? | |||
mpage->desc : mpage->mlinks->name, TYPE_Nd); | |||
} else if (NULL != man) | } else if (NULL != man) | ||
parse_man(of, man_node(man)); | parse_man(mpage, man_node(man)); | ||
else | else | ||
parse_catpage(of, base); | parse_cat(mpage, fd[0]); | ||
dbindex(mc, form, of, base); | dbadd(mpage, mc); | ||
nextpage: | |||
if (child_pid) { | |||
if (-1 == waitpid(child_pid, &status, 0)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(mpage->mlinks->file, "&wait gunzip"); | |||
} else if (WIFSIGNALED(status)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(mpage->mlinks->file, | |||
"gunzip died from signal %d", | |||
WTERMSIG(status)); | |||
} else if (WEXITSTATUS(status)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(mpage->mlinks->file, | |||
"gunzip failed with code %d", | |||
WEXITSTATUS(status)); | |||
} | |||
} | |||
ohash_delete(&strings); | |||
mpage = ohash_next(&mpages, &pslot); | |||
} | } | ||
return(1); | if (0 == nodb) | ||
SQL_EXEC("END TRANSACTION"); | |||
} | } | ||
static void | static void | ||
parse_catpage(struct of *of, const char *base) | parse_cat(struct mpage *mpage, int fd) | ||
{ | { | ||
FILE *stream; | FILE *stream; | ||
char *line, *p, *title; | char *line, *p, *title; | ||
size_t len, plen, titlesz; | size_t len, plen, titlesz; | ||
if (NULL == (stream = fopen(of->file, "r"))) { | stream = (-1 == fd) ? | ||
WARNING(of->file, base, "%s", strerror(errno)); | fopen(mpage->mlinks->file, "r") : | ||
fdopen(fd, "r"); | |||
if (NULL == stream) { | |||
if (warnings) | |||
say(mpage->mlinks->file, "&fopen"); | |||
return; | return; | ||
} | } | ||
|
|
||
*/ | */ | ||
if (NULL == title || '\0' == *title) { | if (NULL == title || '\0' == *title) { | ||
WARNING(of->file, base, "Cannot find NAME section"); | if (warnings) | ||
say(mpage->mlinks->file, | |||
"Cannot find NAME section"); | |||
assert(NULL == mpage->desc); | |||
mpage->desc = mandoc_strdup(mpage->mlinks->name); | |||
putkey(mpage, mpage->mlinks->name, TYPE_Nd); | |||
fclose(stream); | fclose(stream); | ||
free(title); | free(title); | ||
return; | return; | ||
|
|
||
for (p += 2; ' ' == *p || '\b' == *p; p++) | for (p += 2; ' ' == *p || '\b' == *p; p++) | ||
/* Skip to next word. */ ; | /* Skip to next word. */ ; | ||
} else { | } else { | ||
WARNING(of->file, base, "No dash in title line"); | if (warnings) | ||
say(mpage->mlinks->file, | |||
"No dash in title line"); | |||
p = title; | p = title; | ||
} | } | ||
|
|
||
plen -= 2; | plen -= 2; | ||
} | } | ||
of->desc = stradd(p); | assert(NULL == mpage->desc); | ||
putkey(of, p, TYPE_Nd); | mpage->desc = mandoc_strdup(p); | ||
putkey(mpage, mpage->desc, TYPE_Nd); | |||
fclose(stream); | fclose(stream); | ||
free(title); | free(title); | ||
} | } | ||
|
|
||
* Put a type/word pair into the word database for this particular file. | * Put a type/word pair into the word database for this particular file. | ||
*/ | */ | ||
static void | static void | ||
putkey(const struct of *of, const char *value, uint64_t type) | putkey(const struct mpage *mpage, char *value, uint64_t type) | ||
{ | { | ||
char *cp; | |||
assert(NULL != value); | assert(NULL != value); | ||
wordaddbuf(of, value, strlen(value), type); | if (TYPE_arch == type) | ||
for (cp = value; *cp; cp++) | |||
if (isupper((unsigned char)*cp)) | |||
*cp = _tolower((unsigned char)*cp); | |||
putkeys(mpage, value, strlen(value), type); | |||
} | } | ||
/* | /* | ||
* Like putkey() but for unterminated strings. | |||
*/ | |||
static void | |||
putkeys(const struct of *of, const char *value, int sz, uint64_t type) | |||
{ | |||
wordaddbuf(of, value, sz, type); | |||
} | |||
/* | |||
* Grok all nodes at or below a certain mdoc node into putkey(). | * Grok all nodes at or below a certain mdoc node into putkey(). | ||
*/ | */ | ||
static void | static void | ||
putmdockey(const struct of *of, const struct mdoc_node *n, uint64_t m) | putmdockey(const struct mpage *mpage, | ||
const struct mdoc_node *n, uint64_t m) | |||
{ | { | ||
for ( ; NULL != n; n = n->next) { | for ( ; NULL != n; n = n->next) { | ||
if (NULL != n->child) | if (NULL != n->child) | ||
putmdockey(of, n->child, m); | putmdockey(mpage, n->child, m); | ||
if (MDOC_TEXT == n->type) | if (MDOC_TEXT == n->type) | ||
putkey(of, n->string, m); | putkey(mpage, n->string, m); | ||
} | } | ||
} | } | ||
static int | static void | ||
parse_man(struct of *of, const struct man_node *n) | parse_man(struct mpage *mpage, const struct man_node *n) | ||
{ | { | ||
const struct man_node *head, *body; | const struct man_node *head, *body; | ||
char *start, *sv, *title; | char *start, *title; | ||
char byte; | char byte; | ||
size_t sz, titlesz; | size_t sz; | ||
if (NULL == n) | if (NULL == n) | ||
return(0); | return; | ||
/* | /* | ||
* We're only searching for one thing: the first text child in | * We're only searching for one thing: the first text child in | ||
|
|
||
NULL != (head = (head->child)) && | NULL != (head = (head->child)) && | ||
MAN_TEXT == head->type && | MAN_TEXT == head->type && | ||
0 == strcmp(head->string, "NAME") && | 0 == strcmp(head->string, "NAME") && | ||
NULL != (body = body->child) && | NULL != body->child) { | ||
MAN_TEXT == body->type) { | |||
title = NULL; | |||
titlesz = 0; | |||
/* | /* | ||
* Suck the entire NAME section into memory. | * Suck the entire NAME section into memory. | ||
* Yes, we might run away. | * Yes, we might run away. | ||
|
|
||
* NAME sections over many lines. | * NAME sections over many lines. | ||
*/ | */ | ||
for ( ; NULL != body; body = body->next) { | title = NULL; | ||
if (MAN_TEXT != body->type) | man_deroff(&title, body); | ||
break; | |||
if (0 == (sz = strlen(body->string))) | |||
continue; | |||
title = mandoc_realloc | |||
(title, titlesz + sz + 1); | |||
memcpy(title + titlesz, body->string, sz); | |||
titlesz += sz + 1; | |||
title[titlesz - 1] = ' '; | |||
} | |||
if (NULL == title) | if (NULL == title) | ||
return(1); | return; | ||
title = mandoc_realloc(title, titlesz + 1); | |||
title[titlesz] = '\0'; | |||
/* Skip leading space. */ | |||
sv = title; | |||
while (isspace((unsigned char)*sv)) | |||
sv++; | |||
if (0 == (sz = strlen(sv))) { | |||
free(title); | |||
return(1); | |||
} | |||
/* Erase trailing space. */ | |||
start = &sv[sz - 1]; | |||
while (start > sv && isspace((unsigned char)*start)) | |||
*start-- = '\0'; | |||
if (start == sv) { | |||
free(title); | |||
return(1); | |||
} | |||
start = sv; | |||
/* | /* | ||
* Go through a special heuristic dance here. | * Go through a special heuristic dance here. | ||
* Conventionally, one or more manual names are | * Conventionally, one or more manual names are | ||
|
|
||
* the name parts here. | * the name parts here. | ||
*/ | */ | ||
start = title; | |||
for ( ;; ) { | for ( ;; ) { | ||
sz = strcspn(start, " ,"); | sz = strcspn(start, " ,"); | ||
if ('\0' == start[sz]) | if ('\0' == start[sz]) | ||
|
|
||
byte = start[sz]; | byte = start[sz]; | ||
start[sz] = '\0'; | start[sz] = '\0'; | ||
putkey(of, start, TYPE_Nm); | /* | ||
* Assume a stray trailing comma in the | |||
* name list if a name begins with a dash. | |||
*/ | |||
if ('-' == start[0] || | |||
('\\' == start[0] && '-' == start[1])) | |||
break; | |||
putkey(mpage, start, TYPE_Nm); | |||
if (' ' == byte) { | if (' ' == byte) { | ||
start += sz + 1; | start += sz + 1; | ||
break; | break; | ||
|
|
||
start++; | start++; | ||
} | } | ||
if (sv == start) { | if (start == title) { | ||
putkey(of, start, TYPE_Nm); | putkey(mpage, start, TYPE_Nm); | ||
free(title); | free(title); | ||
return(1); | return; | ||
} | } | ||
while (isspace((unsigned char)*start)) | while (isspace((unsigned char)*start)) | ||
|
|
||
while (' ' == *start) | while (' ' == *start) | ||
start++; | start++; | ||
assert(NULL == of->desc); | assert(NULL == mpage->desc); | ||
of->desc = stradd(start); | mpage->desc = mandoc_strdup(start); | ||
putkey(of, start, TYPE_Nd); | putkey(mpage, mpage->desc, TYPE_Nd); | ||
free(title); | free(title); | ||
return(1); | return; | ||
} | } | ||
} | } | ||
for (n = n->child; n; n = n->next) | for (n = n->child; n; n = n->next) { | ||
if (parse_man(of, n)) | if (NULL != mpage->desc) | ||
return(1); | break; | ||
parse_man(mpage, n); | |||
return(0); | } | ||
} | } | ||
static void | static void | ||
parse_mdoc(struct of *of, const struct mdoc_node *n) | parse_mdoc(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
assert(NULL != n); | assert(NULL != n); | ||
|
|
||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||
case (MDOC_TAIL): | case (MDOC_TAIL): | ||
if (NULL != mdocs[n->tok].fp) | if (NULL != mdocs[n->tok].fp) | ||
if (0 == (*mdocs[n->tok].fp)(of, n)) | if (0 == (*mdocs[n->tok].fp)(mpage, n)) | ||
break; | break; | ||
if (mdocs[n->tok].mask) | |||
if (MDOCF_CHILD & mdocs[n->tok].flags) | putmdockey(mpage, n->child, | ||
putmdockey(of, n->child, mdocs[n->tok].mask); | mdocs[n->tok].mask); | ||
break; | break; | ||
default: | default: | ||
assert(MDOC_ROOT != n->type); | assert(MDOC_ROOT != n->type); | ||
continue; | continue; | ||
} | } | ||
if (NULL != n->child) | if (NULL != n->child) | ||
parse_mdoc(of, n); | parse_mdoc(mpage, n); | ||
} | } | ||
} | } | ||
static int | static int | ||
parse_mdoc_Fd(struct of *of, const struct mdoc_node *n) | parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
const char *start, *end; | const char *start, *end; | ||
size_t sz; | size_t sz; | ||
|
|
||
end--; | end--; | ||
if (end > start) | if (end > start) | ||
putkeys(of, start, end - start + 1, TYPE_In); | putkeys(mpage, start, end - start + 1, TYPE_In); | ||
return(1); | return(0); | ||
} | } | ||
static int | static int | ||
parse_mdoc_In(struct of *of, const struct mdoc_node *n) | parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
char *cp; | |||
if (NULL != n->child && MDOC_TEXT == n->child->type) | |||
return(0); | |||
putkey(of, n->child->string, TYPE_In); | |||
return(1); | |||
} | |||
static int | |||
parse_mdoc_Fn(struct of *of, const struct mdoc_node *n) | |||
{ | |||
const char *cp; | |||
if (NULL == (n = n->child) || MDOC_TEXT != n->type) | if (NULL == (n = n->child) || MDOC_TEXT != n->type) | ||
return(0); | return(0); | ||
|
|
||
while ('*' == *cp) | while ('*' == *cp) | ||
cp++; | cp++; | ||
putkey(of, cp, TYPE_Fn); | putkey(mpage, cp, TYPE_Fn); | ||
if (n->string < cp) | if (n->string < cp) | ||
putkeys(of, n->string, cp - n->string, TYPE_Ft); | putkeys(mpage, n->string, cp - n->string, TYPE_Ft); | ||
for (n = n->next; NULL != n; n = n->next) | for (n = n->next; NULL != n; n = n->next) | ||
if (MDOC_TEXT == n->type) | if (MDOC_TEXT == n->type) | ||
putkey(of, n->string, TYPE_Fa); | putkey(mpage, n->string, TYPE_Fa); | ||
return(0); | return(0); | ||
} | } | ||
static int | static int | ||
parse_mdoc_St(struct of *of, const struct mdoc_node *n) | parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
char *cp; | |||
if (NULL == n->child || MDOC_TEXT != n->child->type) | if (NULL == (n = n->child)) | ||
return(0); | return(0); | ||
putkey(of, n->child->string, TYPE_St); | if (NULL == n->next) { | ||
return(1); | putkey(mpage, n->string, TYPE_Xr); | ||
} | |||
static int | |||
parse_mdoc_Xr(struct of *of, const struct mdoc_node *n) | |||
{ | |||
if (NULL == (n = n->child)) | |||
return(0); | return(0); | ||
} | |||
putkey(of, n->string, TYPE_Xr); | mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); | ||
return(1); | putkey(mpage, cp, TYPE_Xr); | ||
free(cp); | |||
return(0); | |||
} | } | ||
static int | static int | ||
parse_mdoc_Nd(struct of *of, const struct mdoc_node *n) | parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
size_t sz; | |||
char *sv, *desc; | |||
if (MDOC_BODY != n->type) | if (MDOC_BODY == n->type) | ||
return(0); | mdoc_deroff(&mpage->desc, n); | ||
return(0); | |||
/* | |||
* Special-case the `Nd' because we need to put the description | |||
* into the document table. | |||
*/ | |||
desc = NULL; | |||
for (n = n->child; NULL != n; n = n->next) { | |||
if (MDOC_TEXT == n->type) { | |||
sz = strlen(n->string) + 1; | |||
if (NULL != (sv = desc)) | |||
sz += strlen(desc) + 1; | |||
desc = mandoc_realloc(desc, sz); | |||
if (NULL != sv) | |||
strlcat(desc, " ", sz); | |||
else | |||
*desc = '\0'; | |||
strlcat(desc, n->string, sz); | |||
} | |||
if (NULL != n->child) | |||
parse_mdoc_Nd(of, n); | |||
} | |||
of->desc = NULL != desc ? stradd(desc) : NULL; | |||
free(desc); | |||
return(1); | |||
} | } | ||
static int | static int | ||
parse_mdoc_Nm(struct of *of, const struct mdoc_node *n) | parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
if (SEC_NAME == n->sec) | return(SEC_NAME == n->sec || | ||
return(1); | (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type)); | ||
else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) | |||
return(0); | |||
return(1); | |||
} | } | ||
static int | static int | ||
parse_mdoc_Sh(struct of *of, const struct mdoc_node *n) | parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); | return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); | ||
} | } | ||
static int | static int | ||
parse_mdoc_head(struct of *of, const struct mdoc_node *n) | parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
return(MDOC_HEAD == n->type); | return(MDOC_HEAD == n->type); | ||
} | } | ||
static int | static int | ||
parse_mdoc_body(struct of *of, const struct mdoc_node *n) | parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n) | ||
{ | { | ||
return(MDOC_BODY == n->type); | return(MDOC_BODY == n->type); | ||
} | } | ||
/* | /* | ||
* See straddbuf(). | * Add a string to the hash table for the current manual. | ||
* Each string has a bitmask telling which macros it belongs to. | |||
* When we finish the manual, we'll dump the table. | |||
*/ | */ | ||
static char * | |||
stradd(const char *cp) | |||
{ | |||
return(straddbuf(cp, strlen(cp))); | |||
} | |||
/* | |||
* This looks up or adds a string to the string table. | |||
* The string table is a table of all strings encountered during parse | |||
* or file scan. | |||
* In using it, we avoid having thousands of (e.g.) "cat1" string | |||
* allocations for the "of" table. | |||
* We also have a layer atop the string table for keeping track of words | |||
* in a parse sequence (see wordaddbuf()). | |||
*/ | |||
static char * | |||
straddbuf(const char *cp, size_t sz) | |||
{ | |||
struct str *s; | |||
unsigned int index; | |||
const char *end; | |||
if (NULL != (s = hashget(cp, sz))) | |||
return(s->key); | |||
s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); | |||
memcpy(s->key, cp, sz); | |||
end = cp + sz; | |||
index = ohash_qlookupi(&strings, cp, &end); | |||
assert(NULL == ohash_find(&strings, index)); | |||
ohash_insert(&strings, index, s); | |||
return(s->key); | |||
} | |||
static struct str * | |||
hashget(const char *cp, size_t sz) | |||
{ | |||
unsigned int index; | |||
const char *end; | |||
end = cp + sz; | |||
index = ohash_qlookupi(&strings, cp, &end); | |||
return(ohash_find(&strings, index)); | |||
} | |||
/* | |||
* Add a word to the current parse sequence. | |||
* Within the hashtable of strings, we maintain a list of strings that | |||
* are currently indexed. | |||
* Each of these ("words") has a bitmask modified within the parse. | |||
* When we finish a parse, we'll dump the list, then remove the head | |||
* entry -- since the next parse will have a new "of", it can keep track | |||
* of its entries without conflict. | |||
*/ | |||
static void | static void | ||
wordaddbuf(const struct of *of, | putkeys(const struct mpage *mpage, | ||
const char *cp, size_t sz, uint64_t v) | const char *cp, size_t sz, uint64_t v) | ||
{ | { | ||
struct str *s; | struct str *s; | ||
unsigned int index; | |||
const char *end; | const char *end; | ||
uint64_t mask; | |||
unsigned int slot; | |||
int i; | |||
if (0 == sz) | if (0 == sz) | ||
return; | return; | ||
s = hashget(cp, sz); | if (debug > 1) { | ||
for (i = 0, mask = 1; | |||
i < mansearch_keymax; | |||
i++, mask <<= 1) | |||
if (mask & v) | |||
break; | |||
say(mpage->mlinks->file, "Adding key %s=%*s", | |||
mansearch_keynames[i], sz, cp); | |||
} | |||
if (NULL != s && of == s->of) { | end = cp + sz; | ||
slot = ohash_qlookupi(&strings, cp, &end); | |||
s = ohash_find(&strings, slot); | |||
if (NULL != s && mpage == s->mpage) { | |||
s->mask |= v; | s->mask |= v; | ||
return; | return; | ||
} else if (NULL == s) { | } else if (NULL == s) { | ||
s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); | s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); | ||
memcpy(s->key, cp, sz); | memcpy(s->key, cp, sz); | ||
end = cp + sz; | ohash_insert(&strings, slot, s); | ||
index = ohash_qlookupi(&strings, cp, &end); | |||
assert(NULL == ohash_find(&strings, index)); | |||
ohash_insert(&strings, index, s); | |||
} | } | ||
s->mpage = mpage; | |||
s->next = words; | |||
s->of = of; | |||
s->mask = v; | s->mask = v; | ||
words = s; | |||
} | } | ||
/* | /* | ||
|
|
||
} | } | ||
/* | /* | ||
* Store the UTF-8 version of a key, or alias the pointer if the key has | * Store the rendered version of a key, or alias the pointer | ||
* no UTF-8 transcription marks in it. | * if the key contains no escape sequences. | ||
*/ | */ | ||
static void | static void | ||
utf8key(struct mchars *mc, struct str *key) | render_key(struct mchars *mc, struct str *key) | ||
{ | { | ||
size_t sz, bsz, pos; | size_t sz, bsz, pos; | ||
char utfbuf[7], res[5]; | char utfbuf[7], res[6]; | ||
char *buf; | char *buf; | ||
const char *seq, *cpp, *val; | const char *seq, *cpp, *val; | ||
int len, u; | int len, u; | ||
enum mandoc_esc esc; | enum mandoc_esc esc; | ||
assert(NULL == key->utf8); | assert(NULL == key->rendered); | ||
res[0] = '\\'; | res[0] = '\\'; | ||
res[1] = '\t'; | res[1] = '\t'; | ||
res[2] = ASCII_NBRSP; | res[2] = ASCII_NBRSP; | ||
res[3] = ASCII_HYPH; | res[3] = ASCII_HYPH; | ||
res[4] = '\0'; | res[4] = ASCII_BREAK; | ||
res[5] = '\0'; | |||
val = key->key; | val = key->key; | ||
bsz = strlen(val); | bsz = strlen(val); | ||
|
|
||
* pointer as ourselvse and get out of here. | * pointer as ourselvse and get out of here. | ||
*/ | */ | ||
if (strcspn(val, res) == bsz) { | if (strcspn(val, res) == bsz) { | ||
key->utf8 = key->key; | key->rendered = key->key; | ||
return; | return; | ||
} | } | ||
|
|
||
val += sz; | val += sz; | ||
} | } | ||
if (ASCII_HYPH == *val) { | switch (*val) { | ||
case (ASCII_HYPH): | |||
buf[pos++] = '-'; | buf[pos++] = '-'; | ||
val++; | val++; | ||
continue; | continue; | ||
} else if ('\t' == *val || ASCII_NBRSP == *val) { | case ('\t'): | ||
/* FALLTHROUGH */ | |||
case (ASCII_NBRSP): | |||
buf[pos++] = ' '; | buf[pos++] = ' '; | ||
val++; | val++; | ||
/* FALLTHROUGH */ | |||
case (ASCII_BREAK): | |||
continue; | continue; | ||
} else if ('\\' != *val) | default: | ||
break; | break; | ||
} | |||
if ('\\' != *val) | |||
break; | |||
/* Read past the slash. */ | /* Read past the slash. */ | ||
val++; | val++; | ||
u = 0; | |||
/* | /* | ||
* Parse the escape sequence and see if it's a | * Parse the escape sequence and see if it's a | ||
* predefined character or special character. | * predefined character or special character. | ||
*/ | */ | ||
esc = mandoc_escape | esc = mandoc_escape | ||
((const char **)&val, &seq, &len); | ((const char **)&val, &seq, &len); | ||
if (ESCAPE_ERROR == esc) | if (ESCAPE_ERROR == esc) | ||
break; | break; | ||
if (ESCAPE_SPECIAL != esc) | if (ESCAPE_SPECIAL != esc) | ||
continue; | continue; | ||
if (0 == (u = mchars_spec2cp(mc, seq, len))) | |||
continue; | |||
/* | /* | ||
* If we have a Unicode codepoint, try to convert that | * Render the special character | ||
* to a UTF-8 byte string. | * as either UTF-8 or ASCII. | ||
*/ | */ | ||
cpp = utfbuf; | |||
if (0 == (sz = utf8(u, utfbuf))) | |||
continue; | |||
if (write_utf8) { | |||
if (0 == (u = mchars_spec2cp(mc, seq, len))) | |||
continue; | |||
cpp = utfbuf; | |||
if (0 == (sz = utf8(u, utfbuf))) | |||
continue; | |||
sz = strlen(cpp); | |||
} else { | |||
cpp = mchars_spec2str(mc, seq, len, &sz); | |||
if (NULL == cpp) | |||
continue; | |||
if (ASCII_NBRSP == *cpp) { | |||
cpp = " "; | |||
sz = 1; | |||
} | |||
} | |||
/* Copy the rendered glyph into the stream. */ | /* Copy the rendered glyph into the stream. */ | ||
sz = strlen(cpp); | |||
bsz += sz; | bsz += sz; | ||
buf = mandoc_realloc(buf, bsz); | buf = mandoc_realloc(buf, bsz); | ||
memcpy(&buf[pos], cpp, sz); | memcpy(&buf[pos], cpp, sz); | ||
pos += sz; | pos += sz; | ||
} | } | ||
buf[pos] = '\0'; | buf[pos] = '\0'; | ||
key->utf8 = buf; | key->rendered = buf; | ||
} | } | ||
static void | |||
dbadd_mlink(const struct mlink *mlink) | |||
{ | |||
size_t i; | |||
i = 1; | |||
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); | |||
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); | |||
SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); | |||
SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->recno); | |||
SQL_STEP(stmts[STMT_INSERT_LINK]); | |||
sqlite3_reset(stmts[STMT_INSERT_LINK]); | |||
} | |||
/* | /* | ||
* Flush the current page's terms (and their bits) into the database. | * Flush the current page's terms (and their bits) into the database. | ||
* Wrap the entire set of additions in a transaction to make sqlite be a | * Wrap the entire set of additions in a transaction to make sqlite be a | ||
* little faster. | * little faster. | ||
* Also, UTF-8-encode the description at the last possible moment. | * Also, handle escape sequences at the last possible moment. | ||
*/ | */ | ||
static void | static void | ||
dbindex(struct mchars *mc, int form, | dbadd(struct mpage *mpage, struct mchars *mc) | ||
const struct of *of, const char *base) | |||
{ | { | ||
struct mlink *mlink; | |||
struct str *key; | struct str *key; | ||
const char *desc; | |||
int64_t recno; | |||
size_t i; | size_t i; | ||
unsigned int slot; | |||
DEBUG(of->file, base, "Adding to index"); | if (debug) | ||
say(mpage->mlinks->file, "Adding to database"); | |||
if (nodb) | if (nodb) | ||
return; | return; | ||
desc = ""; | |||
if (NULL != of->desc) { | |||
key = hashget(of->desc, strlen(of->desc)); | |||
assert(NULL != key); | |||
if (NULL == key->utf8) | |||
utf8key(mc, key); | |||
desc = key->utf8; | |||
} | |||
SQL_EXEC("BEGIN TRANSACTION"); | |||
i = 1; | i = 1; | ||
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->file); | SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form); | ||
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->sec); | SQL_STEP(stmts[STMT_INSERT_PAGE]); | ||
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->arch); | mpage->recno = sqlite3_last_insert_rowid(db); | ||
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, desc); | sqlite3_reset(stmts[STMT_INSERT_PAGE]); | ||
SQL_BIND_INT(stmts[STMT_INSERT_DOC], i, form); | |||
SQL_STEP(stmts[STMT_INSERT_DOC]); | |||
recno = sqlite3_last_insert_rowid(db); | |||
sqlite3_reset(stmts[STMT_INSERT_DOC]); | |||
for (key = words; NULL != key; key = key->next) { | for (mlink = mpage->mlinks; mlink; mlink = mlink->next) | ||
assert(key->of == of); | dbadd_mlink(mlink); | ||
if (NULL == key->utf8) | |||
utf8key(mc, key); | for (key = ohash_first(&strings, &slot); NULL != key; | ||
key = ohash_next(&strings, &slot)) { | |||
assert(key->mpage == mpage); | |||
if (NULL == key->rendered) | |||
render_key(mc, key); | |||
i = 1; | i = 1; | ||
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); | SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); | ||
SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8); | SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered); | ||
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); | SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->recno); | ||
SQL_STEP(stmts[STMT_INSERT_KEY]); | SQL_STEP(stmts[STMT_INSERT_KEY]); | ||
sqlite3_reset(stmts[STMT_INSERT_KEY]); | sqlite3_reset(stmts[STMT_INSERT_KEY]); | ||
if (key->rendered != key->key) | |||
free(key->rendered); | |||
free(key); | |||
} | } | ||
SQL_EXEC("END TRANSACTION"); | |||
} | } | ||
static void | static void | ||
dbprune(const char *base) | dbprune(void) | ||
{ | { | ||
struct of *of; | struct mpage *mpage; | ||
struct mlink *mlink; | |||
size_t i; | size_t i; | ||
unsigned int slot; | |||
if (nodb) | if (0 == nodb) | ||
return; | SQL_EXEC("BEGIN TRANSACTION"); | ||
for (of = ofs; NULL != of; of = of->next) { | for (mpage = ohash_first(&mpages, &slot); NULL != mpage; | ||
i = 1; | mpage = ohash_next(&mpages, &slot)) { | ||
SQL_BIND_TEXT(stmts[STMT_DELETE], i, of->file); | mlink = mpage->mlinks; | ||
SQL_STEP(stmts[STMT_DELETE]); | if (debug) | ||
sqlite3_reset(stmts[STMT_DELETE]); | say(mlink->file, "Deleting from database"); | ||
DEBUG(of->file, base, "Deleted from index"); | if (nodb) | ||
continue; | |||
for ( ; NULL != mlink; mlink = mlink->next) { | |||
i = 1; | |||
SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], | |||
i, mlink->dsec); | |||
SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], | |||
i, mlink->arch); | |||
SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], | |||
i, mlink->name); | |||
SQL_STEP(stmts[STMT_DELETE_PAGE]); | |||
sqlite3_reset(stmts[STMT_DELETE_PAGE]); | |||
} | |||
} | } | ||
if (0 == nodb) | |||
SQL_EXEC("END TRANSACTION"); | |||
} | } | ||
/* | /* | ||
|
|
||
* If "real" is not set, rename the temporary file into the real one. | * If "real" is not set, rename the temporary file into the real one. | ||
*/ | */ | ||
static void | static void | ||
dbclose(const char *base, int real) | dbclose(int real) | ||
{ | { | ||
size_t i; | size_t i; | ||
char file[MAXPATHLEN]; | int status; | ||
pid_t child; | |||
if (nodb) | if (nodb) | ||
return; | return; | ||
|
|
||
if (real) | if (real) | ||
return; | return; | ||
strlcpy(file, MANDOC_DB, MAXPATHLEN); | if ('\0' == *tempfilename) { | ||
strlcat(file, "~", MAXPATHLEN); | if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { | ||
if (-1 == rename(file, MANDOC_DB)) | exitcode = (int)MANDOCLEVEL_SYSERR; | ||
perror(MANDOC_DB); | say(MANDOC_DB, "&rename"); | ||
} | |||
return; | |||
} | |||
switch (child = fork()) { | |||
case (-1): | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&fork cmp"); | |||
return; | |||
case (0): | |||
execlp("cmp", "cmp", "-s", | |||
tempfilename, MANDOC_DB, NULL); | |||
say("", "&exec cmp"); | |||
exit(0); | |||
default: | |||
break; | |||
} | |||
if (-1 == waitpid(child, &status, 0)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&wait cmp"); | |||
} else if (WIFSIGNALED(status)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "cmp died from signal %d", WTERMSIG(status)); | |||
} else if (WEXITSTATUS(status)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(MANDOC_DB, | |||
"Data changed, but cannot replace database"); | |||
} | |||
*strrchr(tempfilename, '/') = '\0'; | |||
switch (child = fork()) { | |||
case (-1): | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&fork rm"); | |||
return; | |||
case (0): | |||
execlp("rm", "rm", "-rf", tempfilename, NULL); | |||
say("", "&exec rm"); | |||
exit((int)MANDOCLEVEL_SYSERR); | |||
default: | |||
break; | |||
} | |||
if (-1 == waitpid(child, &status, 0)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&wait rm"); | |||
} else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "%s: Cannot remove temporary directory", | |||
tempfilename); | |||
} | |||
} | } | ||
/* | /* | ||
|
|
||
* Must be matched by dbclose(). | * Must be matched by dbclose(). | ||
*/ | */ | ||
static int | static int | ||
dbopen(const char *base, int real) | dbopen(int real) | ||
{ | { | ||
char file[MAXPATHLEN]; | |||
const char *sql; | const char *sql; | ||
int rc, ofl; | int rc, ofl; | ||
size_t sz; | |||
if (nodb) | if (nodb) | ||
return(1); | return(1); | ||
sz = strlcpy(file, MANDOC_DB, MAXPATHLEN); | *tempfilename = '\0'; | ||
if ( ! real) | ofl = SQLITE_OPEN_READWRITE; | ||
sz = strlcat(file, "~", MAXPATHLEN); | |||
if (sz >= MAXPATHLEN) { | if (real) { | ||
fprintf(stderr, "%s: Path too long\n", file); | rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL); | ||
return(0); | if (SQLITE_OK != rc) { | ||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say(MANDOC_DB, "%s", sqlite3_errmsg(db)); | |||
return(0); | |||
} | |||
goto prepare_statements; | |||
} | } | ||
if ( ! real) | ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE; | ||
remove(file); | |||
ofl = SQLITE_OPEN_READWRITE | | remove(MANDOC_DB "~"); | ||
(0 == real ? SQLITE_OPEN_EXCLUSIVE : 0); | rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL); | ||
rc = sqlite3_open_v2(file, &db, ofl, NULL); | |||
if (SQLITE_OK == rc) | if (SQLITE_OK == rc) | ||
return(1); | goto create_tables; | ||
if (SQLITE_CANTOPEN != rc) { | if (MPARSE_QUICK & mparse_options) { | ||
perror(file); | exitcode = (int)MANDOCLEVEL_SYSERR; | ||
say(MANDOC_DB "~", "%s", sqlite3_errmsg(db)); | |||
return(0); | return(0); | ||
} | } | ||
sqlite3_close(db); | if (strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", | ||
db = NULL; | sizeof(tempfilename)) >= sizeof(tempfilename)) { | ||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
if (SQLITE_OK != (rc = sqlite3_open(file, &db))) { | say("", "/tmp/mandocdb.XXXXXX: Filename too long"); | ||
perror(file); | |||
return(0); | return(0); | ||
} | } | ||
if (NULL == mkdtemp(tempfilename)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&%s", tempfilename); | |||
return(0); | |||
} | |||
if (strlcat(tempfilename, "/" MANDOC_DB, | |||
sizeof(tempfilename)) >= sizeof(tempfilename)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "%s/" MANDOC_DB ": Filename too long", | |||
tempfilename); | |||
return(0); | |||
} | |||
rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); | |||
if (SQLITE_OK != rc) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "%s: %s", tempfilename, sqlite3_errmsg(db)); | |||
return(0); | |||
} | |||
sql = "CREATE TABLE \"docs\" (\n" | create_tables: | ||
" \"file\" TEXT NOT NULL,\n" | sql = "CREATE TABLE \"mpages\" (\n" | ||
" \"sec\" TEXT NOT NULL,\n" | |||
" \"arch\" TEXT NOT NULL,\n" | |||
" \"desc\" TEXT NOT NULL,\n" | |||
" \"form\" INTEGER NOT NULL,\n" | " \"form\" INTEGER NOT NULL,\n" | ||
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" | " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" | ||
");\n" | ");\n" | ||
"\n" | "\n" | ||
"CREATE TABLE \"mlinks\" (\n" | |||
" \"sec\" TEXT NOT NULL,\n" | |||
" \"arch\" TEXT NOT NULL,\n" | |||
" \"name\" TEXT NOT NULL,\n" | |||
" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) " | |||
"ON DELETE CASCADE\n" | |||
");\n" | |||
"\n" | |||
"CREATE TABLE \"keys\" (\n" | "CREATE TABLE \"keys\" (\n" | ||
" \"bits\" INTEGER NOT NULL,\n" | " \"bits\" INTEGER NOT NULL,\n" | ||
" \"key\" TEXT NOT NULL,\n" | " \"key\" TEXT NOT NULL,\n" | ||
" \"docid\" INTEGER NOT NULL REFERENCES docs(id) " | " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) " | ||
"ON DELETE CASCADE,\n" | "ON DELETE CASCADE\n" | ||
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" | ");\n"; | ||
");\n" | |||
"\n" | |||
"CREATE INDEX \"key_index\" ON keys (key);\n"; | |||
if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { | if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { | ||
perror(sqlite3_errmsg(db)); | exitcode = (int)MANDOCLEVEL_SYSERR; | ||
say(MANDOC_DB, "%s", sqlite3_errmsg(db)); | |||
return(0); | return(0); | ||
} | } | ||
sql = "DELETE FROM docs where file=?"; | prepare_statements: | ||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE], NULL); | SQL_EXEC("PRAGMA foreign_keys = ON"); | ||
sql = "INSERT INTO docs " | sql = "DELETE FROM mpages WHERE id IN " | ||
"(file,sec,arch,desc,form) VALUES (?,?,?,?,?)"; | "(SELECT pageid FROM mlinks WHERE " | ||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_DOC], NULL); | "sec=? AND arch=? AND name=?)"; | ||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL); | |||
sql = "INSERT INTO mpages " | |||
"(form) VALUES (?)"; | |||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL); | |||
sql = "INSERT INTO mlinks " | |||
"(sec,arch,name,pageid) VALUES (?,?,?,?)"; | |||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL); | |||
sql = "INSERT INTO keys " | sql = "INSERT INTO keys " | ||
"(bits,key,docid) VALUES (?,?,?)"; | "(bits,key,pageid) VALUES (?,?,?)"; | ||
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); | sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); | ||
#ifndef __APPLE__ | |||
/* | |||
* When opening a new database, we can turn off | |||
* synchronous mode for much better performance. | |||
*/ | |||
if (real) | |||
SQL_EXEC("PRAGMA synchronous = OFF"); | |||
#endif | |||
return(1); | return(1); | ||
} | } | ||
|
|
||
} | } | ||
static int | static int | ||
path_reset(const char *cwd, int fd, const char *base) | set_basedir(const char *targetdir) | ||
{ | { | ||
static char startdir[PATH_MAX]; | |||
static int fd; | |||
if (-1 == fchdir(fd)) { | /* | ||
perror(cwd); | * Remember where we started by keeping a fd open to the origin | ||
* path component: throughout this utility, we chdir() a lot to | |||
* handle relative paths, and by doing this, we can return to | |||
* the starting point. | |||
*/ | |||
if ('\0' == *startdir) { | |||
if (NULL == getcwd(startdir, PATH_MAX)) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
if (NULL != targetdir) | |||
say("", "&getcwd"); | |||
return(0); | |||
} | |||
if (-1 == (fd = open(startdir, O_RDONLY, 0))) { | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&open %s", startdir); | |||
return(0); | |||
} | |||
if (NULL == targetdir) | |||
targetdir = startdir; | |||
} else { | |||
if (-1 == fd) | |||
return(0); | |||
if (-1 == fchdir(fd)) { | |||
close(fd); | |||
basedir[0] = '\0'; | |||
exitcode = (int)MANDOCLEVEL_SYSERR; | |||
say("", "&chdir %s", startdir); | |||
return(0); | |||
} | |||
if (NULL == targetdir) { | |||
close(fd); | |||
return(1); | |||
} | |||
} | |||
if (NULL == realpath(targetdir, basedir)) { | |||
basedir[0] = '\0'; | |||
exitcode = (int)MANDOCLEVEL_BADARG; | |||
say("", "&%s: realpath", targetdir); | |||
return(0); | return(0); | ||
} else if (-1 == chdir(base)) { | } else if (-1 == chdir(basedir)) { | ||
perror(base); | exitcode = (int)MANDOCLEVEL_BADARG; | ||
say("", "&chdir"); | |||
return(0); | return(0); | ||
} | } | ||
return(1); | return(1); | ||
} | |||
static void | |||
say(const char *file, const char *format, ...) | |||
{ | |||
va_list ap; | |||
int use_errno; | |||
if ('\0' != *basedir) | |||
fprintf(stderr, "%s", basedir); | |||
if ('\0' != *basedir && '\0' != *file) | |||
fputs("//", stderr); | |||
if ('\0' != *file) | |||
fprintf(stderr, "%s", file); | |||
use_errno = 1; | |||
if (NULL != format) { | |||
switch (*format) { | |||
case ('&'): | |||
format++; | |||
break; | |||
case ('\0'): | |||
format = NULL; | |||
break; | |||
default: | |||
use_errno = 0; | |||
break; | |||
} | |||
} | |||
if (NULL != format) { | |||
if ('\0' != *basedir || '\0' != *file) | |||
fputs(": ", stderr); | |||
va_start(ap, format); | |||
vfprintf(stderr, format, ap); | |||
va_end(ap); | |||
} | |||
if (use_errno) { | |||
if ('\0' != *basedir || '\0' != *file || NULL != format) | |||
fputs(": ", stderr); | |||
perror(NULL); | |||
} else | |||
fputc('\n', stderr); | |||
} | } |