mandoc/mandocdb.c - diff

Return to mandocdb.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/mandocdb.c between version 1.64 and 1.98

version 1.64, 2013/06/06 17:51:31

version 1.98, 2014/01/05 03:06:43

Line 1

/* $Id$ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

Line 76 enum op {

};

enum form {

FORM_SRC, /* format is -man or -mdoc */

FORM_NONE, /* format is unknown */

FORM_CAT, /* format is cat */

FORM_SRC, /* format is -man or -mdoc */

FORM_NONE /* format is unknown */

FORM_CAT /* format is cat */

};

struct str {

char *utf8; /* key in UTF-8 form */

char *rendered; /* key in UTF-8 or ASCII form */

const struct of *of; /* if set, the owning parse */

const struct mpage *mpage; /* if set, the owning parse */

struct str *next; /* next in owning parse sequence */

uint64_t mask; /* bitmask in sequence */

char key[]; /* the string itself */

char key[]; /* may contain escape sequences */

};

struct id {

struct inodev {

ino_t ino;

ino_t st_ino;

dev_t dev;

dev_t st_dev;

};

struct of {

struct mpage {

struct id id; /* used for hashing routine */

struct inodev inodev; /* used for hashing routine */

struct of *next; /* next in ofs */

enum form form; /* format from file content */

enum form dform; /* path-cued form */

char *sec; /* section from file content */

enum form sform; /* suffix-cued form */

char *arch; /* architecture from file content */

char *title; /* title from file content */

char *desc; /* description from file content */

struct mlink *mlinks; /* singly linked list */

};

struct mlink {

char file[PATH_MAX]; /* filename rel. to manpath */

const char *desc; /* parsed description */

enum form dform; /* format from directory */

const char *sec; /* suffix-cued section (or empty) */

enum form fform; /* format from file name suffix */

const char *dsec; /* path-cued section (or empty) */

char *dsec; /* section from directory */

const char *arch; /* path-cued arch. (or empty) */

char *arch; /* architecture from directory */

const char *name; /* name (from filename) (not empty) */

char *name; /* name from file name (not empty) */

char *fsec; /* section from file name suffix */

struct mlink *next; /* singly linked list */

};

struct title {

char *title; /* name(sec/arch) given inside the file */

char *file; /* file name in case of mismatch */

};

enum stmt {

STMT_DELETE = 0, /* delete manpage */

STMT_DELETE_PAGE = 0, /* delete mpage */

STMT_INSERT_DOC, /* insert manpage */

STMT_INSERT_PAGE, /* insert mpage */

STMT_INSERT_KEY, /* insert parsed key */

STMT_INSERT_LINK, /* insert mlink */

STMT_INSERT_KEY, /* insert parsed key */

STMT__MAX

};

typedef int (*mdoc_fp)(struct of *, const struct mdoc_node *);

typedef int (*mdoc_fp)(struct mpage *, const struct mdoc_node *);

struct mdoc_handler {

mdoc_fp fp; /* optional handler */

uint64_t mask; /* set unless handler returns 0 */

int flags; /* for use by pmdoc_node */

#define MDOCF_CHILD 0x01 /* automatically index child nodes */

};

static void dbclose(int);

static void dbindex(struct mchars *, int, const struct of *);

static void dbindex(const struct mpage *, struct mchars *);

static int dbopen(int);

static void dbprune(void);

static void fileadd(struct of *);

static int filecheck(const char *);

static void filescan(const char *);

static struct str *hashget(const char *, size_t);

static void *hash_alloc(size_t, void *);

static void hash_free(void *, size_t, void *);

static void *hash_halloc(size_t, void *);

static void inoadd(const struct stat *, struct of *);

static void mlink_add(struct mlink *, const struct stat *);

static int inocheck(const struct stat *);

static int mlink_check(struct mpage *, struct mlink *);

static void ofadd(int, const char *, const char *, const char *,

static void mlink_free(struct mlink *);

const char *, const char *, const struct stat *);

static void mlinks_undupe(struct mpage *);

static void offree(void);

static void mpages_free(void);

static void ofmerge(struct mchars *, struct mparse *);

static void mpages_merge(struct mchars *, struct mparse *, int);

static void parse_catpage(struct of *);

static void parse_cat(struct mpage *);

static void parse_man(struct of *, const struct man_node *);

static void parse_man(struct mpage *, const struct man_node *);

static void parse_mdoc(struct of *, const struct mdoc_node *);

static void parse_mdoc(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_body(struct of *, const struct mdoc_node *);

static int parse_mdoc_body(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_head(struct of *, const struct mdoc_node *);

static int parse_mdoc_head(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_Fd(struct of *, const struct mdoc_node *);

static int parse_mdoc_Fd(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_Fn(struct of *, const struct mdoc_node *);

static int parse_mdoc_Fn(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_In(struct of *, const struct mdoc_node *);

static int parse_mdoc_Nd(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_Nd(struct of *, const struct mdoc_node *);

static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_Nm(struct of *, const struct mdoc_node *);

static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_Sh(struct of *, const struct mdoc_node *);

static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);

static int parse_mdoc_St(struct of *, const struct mdoc_node *);

static void putkey(const struct mpage *,

static int parse_mdoc_Xr(struct of *, const struct mdoc_node *);

static int set_basedir(const char *);

static void putkey(const struct of *,

const char *, uint64_t);

static void putkeys(const struct of *,

static void putkeys(const struct mpage *,

const char *, size_t, uint64_t);

static void putmdockey(const struct of *,

static void putmdockey(const struct mpage *,

const struct mdoc_node *, uint64_t);

static void render_key(struct mchars *, struct str *);

static void say(const char *, const char *, ...);

static char *stradd(const char *);

static int set_basedir(const char *);

static char *stradds(const char *, size_t);

static int treescan(void);

static size_t utf8(unsigned int, char [7]);

static void utf8key(struct mchars *, struct str *);

static char *progname;

static int use_all; /* use all found files */

static int nodb; /* no database changes */

static int verb; /* print what we're doing */

static int warnings; /* warn about crap */

static int write_utf8; /* write UTF-8 output; else ASCII */

static int exitcode; /* to be returned by main */

static enum op op; /* operational mode */

static char basedir[PATH_MAX]; /* current base directory */

static struct ohash inos; /* table of inodes/devices */

static struct ohash mpages; /* table of distinct manual pages */

static struct ohash filenames; /* table of filenames */

static struct ohash mlinks; /* table of directory entries */

static struct ohash strings; /* table of all strings */

static struct of *ofs = NULL; /* vector of files to parse */

static struct str *words = NULL; /* word list in current parse */

static sqlite3 *db = NULL; /* current database */

static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */

static const struct mdoc_handler mdocs[MDOC_MAX] = {

{ NULL, 0, 0 }, /* Ap */

{ NULL, 0 }, /* Ap */

{ NULL, 0, 0 }, /* Dd */

{ NULL, 0 }, /* Dd */

{ NULL, 0, 0 }, /* Dt */

{ NULL, 0 }, /* Dt */

{ NULL, 0, 0 }, /* Os */

{ NULL, 0 }, /* Os */

{ parse_mdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */

{ parse_mdoc_Sh, TYPE_Sh }, /* Sh */

{ parse_mdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */

{ parse_mdoc_head, TYPE_Ss }, /* Ss */

{ NULL, 0, 0 }, /* Pp */

{ NULL, 0 }, /* Pp */

{ NULL, 0, 0 }, /* D1 */

{ NULL, 0 }, /* D1 */

{ NULL, 0, 0 }, /* Dl */

{ NULL, 0 }, /* Dl */

{ NULL, 0, 0 }, /* Bd */

{ NULL, 0 }, /* Bd */

{ NULL, 0, 0 }, /* Ed */

{ NULL, 0 }, /* Ed */

{ NULL, 0, 0 }, /* Bl */

{ NULL, 0 }, /* Bl */

{ NULL, 0, 0 }, /* El */

{ NULL, 0 }, /* El */

{ NULL, 0, 0 }, /* It */

{ NULL, 0 }, /* It */

{ NULL, 0, 0 }, /* Ad */

{ NULL, 0 }, /* Ad */

{ NULL, TYPE_An, MDOCF_CHILD }, /* An */

{ NULL, TYPE_An }, /* An */

{ NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */

{ NULL, TYPE_Ar }, /* Ar */

{ NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */

{ NULL, TYPE_Cd }, /* Cd */

{ NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */

{ NULL, TYPE_Cm }, /* Cm */

{ NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */

{ NULL, TYPE_Dv }, /* Dv */

{ NULL, TYPE_Er, MDOCF_CHILD }, /* Er */

{ NULL, TYPE_Er }, /* Er */

{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */

{ NULL, TYPE_Ev }, /* Ev */

{ NULL, 0, 0 }, /* Ex */

{ NULL, 0 }, /* Ex */

{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */

{ NULL, TYPE_Fa }, /* Fa */

{ parse_mdoc_Fd, TYPE_In, 0 }, /* Fd */

{ parse_mdoc_Fd, 0 }, /* Fd */

{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */

{ NULL, TYPE_Fl }, /* Fl */

{ parse_mdoc_Fn, 0, 0 }, /* Fn */

{ parse_mdoc_Fn, 0 }, /* Fn */

{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */

{ NULL, TYPE_Ft }, /* Ft */

{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */

{ NULL, TYPE_Ic }, /* Ic */

{ parse_mdoc_In, TYPE_In, MDOCF_CHILD }, /* In */

{ NULL, TYPE_In }, /* In */

{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */

{ NULL, TYPE_Li }, /* Li */

{ parse_mdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */

{ parse_mdoc_Nd, TYPE_Nd }, /* Nd */

{ parse_mdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */

{ parse_mdoc_Nm, TYPE_Nm }, /* Nm */

{ NULL, 0, 0 }, /* Op */

{ NULL, 0 }, /* Op */

{ NULL, 0, 0 }, /* Ot */

{ NULL, 0 }, /* Ot */

{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */

{ NULL, TYPE_Pa }, /* Pa */

{ NULL, 0, 0 }, /* Rv */

{ NULL, 0 }, /* Rv */

{ parse_mdoc_St, TYPE_St, 0 }, /* St */

{ NULL, TYPE_St }, /* St */

{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */

{ NULL, TYPE_Va }, /* Va */

{ parse_mdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */

{ parse_mdoc_body, TYPE_Va }, /* Vt */

{ parse_mdoc_Xr, TYPE_Xr, 0 }, /* Xr */

{ parse_mdoc_Xr, 0 }, /* Xr */

{ NULL, 0, 0 }, /* %A */

{ NULL, 0 }, /* %A */

{ NULL, 0, 0 }, /* %B */

{ NULL, 0 }, /* %B */

{ NULL, 0, 0 }, /* %D */

{ NULL, 0 }, /* %D */

{ NULL, 0, 0 }, /* %I */

{ NULL, 0 }, /* %I */

{ NULL, 0, 0 }, /* %J */

{ NULL, 0 }, /* %J */

{ NULL, 0, 0 }, /* %N */

{ NULL, 0 }, /* %N */

{ NULL, 0, 0 }, /* %O */

{ NULL, 0 }, /* %O */

{ NULL, 0, 0 }, /* %P */

{ NULL, 0 }, /* %P */

{ NULL, 0, 0 }, /* %R */

{ NULL, 0 }, /* %R */

{ NULL, 0, 0 }, /* %T */

{ NULL, 0 }, /* %T */

{ NULL, 0, 0 }, /* %V */

{ NULL, 0 }, /* %V */

{ NULL, 0, 0 }, /* Ac */

{ NULL, 0 }, /* Ac */

{ NULL, 0, 0 }, /* Ao */

{ NULL, 0 }, /* Ao */

{ NULL, 0, 0 }, /* Aq */

{ NULL, 0 }, /* Aq */

{ NULL, TYPE_At, MDOCF_CHILD }, /* At */

{ NULL, TYPE_At }, /* At */

{ NULL, 0, 0 }, /* Bc */

{ NULL, 0 }, /* Bc */

{ NULL, 0, 0 }, /* Bf */

{ NULL, 0 }, /* Bf */

{ NULL, 0, 0 }, /* Bo */

{ NULL, 0 }, /* Bo */

{ NULL, 0, 0 }, /* Bq */

{ NULL, 0 }, /* Bq */

{ NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */

{ NULL, TYPE_Bsx }, /* Bsx */

{ NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */

{ NULL, TYPE_Bx }, /* Bx */

{ NULL, 0, 0 }, /* Db */

{ NULL, 0 }, /* Db */

{ NULL, 0, 0 }, /* Dc */

{ NULL, 0 }, /* Dc */

{ NULL, 0, 0 }, /* Do */

{ NULL, 0 }, /* Do */

{ NULL, 0, 0 }, /* Dq */

{ NULL, 0 }, /* Dq */

{ NULL, 0, 0 }, /* Ec */

{ NULL, 0 }, /* Ec */

{ NULL, 0, 0 }, /* Ef */

{ NULL, 0 }, /* Ef */

{ NULL, TYPE_Em, MDOCF_CHILD }, /* Em */

{ NULL, TYPE_Em }, /* Em */

{ NULL, 0, 0 }, /* Eo */

{ NULL, 0 }, /* Eo */

{ NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */

{ NULL, TYPE_Fx }, /* Fx */

{ NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */

{ NULL, TYPE_Ms }, /* Ms */

{ NULL, 0, 0 }, /* No */

{ NULL, 0 }, /* No */

{ NULL, 0, 0 }, /* Ns */

{ NULL, 0 }, /* Ns */

{ NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */

{ NULL, TYPE_Nx }, /* Nx */

{ NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */

{ NULL, TYPE_Ox }, /* Ox */

{ NULL, 0, 0 }, /* Pc */

{ NULL, 0 }, /* Pc */

{ NULL, 0, 0 }, /* Pf */

{ NULL, 0 }, /* Pf */

{ NULL, 0, 0 }, /* Po */

{ NULL, 0 }, /* Po */

{ NULL, 0, 0 }, /* Pq */

{ NULL, 0 }, /* Pq */

{ NULL, 0, 0 }, /* Qc */

{ NULL, 0 }, /* Qc */

{ NULL, 0, 0 }, /* Ql */

{ NULL, 0 }, /* Ql */

{ NULL, 0, 0 }, /* Qo */

{ NULL, 0 }, /* Qo */

{ NULL, 0, 0 }, /* Qq */

{ NULL, 0 }, /* Qq */

{ NULL, 0, 0 }, /* Re */

{ NULL, 0 }, /* Re */

{ NULL, 0, 0 }, /* Rs */

{ NULL, 0 }, /* Rs */

{ NULL, 0, 0 }, /* Sc */

{ NULL, 0 }, /* Sc */

{ NULL, 0, 0 }, /* So */

{ NULL, 0 }, /* So */

{ NULL, 0, 0 }, /* Sq */

{ NULL, 0 }, /* Sq */

{ NULL, 0, 0 }, /* Sm */

{ NULL, 0 }, /* Sm */

{ NULL, 0, 0 }, /* Sx */

{ NULL, 0 }, /* Sx */

{ NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */

{ NULL, TYPE_Sy }, /* Sy */

{ NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */

{ NULL, TYPE_Tn }, /* Tn */

{ NULL, 0, 0 }, /* Ux */

{ NULL, 0 }, /* Ux */

{ NULL, 0, 0 }, /* Xc */

{ NULL, 0 }, /* Xc */

{ NULL, 0, 0 }, /* Xo */

{ NULL, 0 }, /* Xo */

{ parse_mdoc_head, TYPE_Fn, 0 }, /* Fo */

{ parse_mdoc_head, 0 }, /* Fo */

{ NULL, 0, 0 }, /* Fc */

{ NULL, 0 }, /* Fc */

{ NULL, 0, 0 }, /* Oo */

{ NULL, 0 }, /* Oo */

{ NULL, 0, 0 }, /* Oc */

{ NULL, 0 }, /* Oc */

{ NULL, 0, 0 }, /* Bk */

{ NULL, 0 }, /* Bk */

{ NULL, 0, 0 }, /* Ek */

{ NULL, 0 }, /* Ek */

{ NULL, 0, 0 }, /* Bt */

{ NULL, 0 }, /* Bt */

{ NULL, 0, 0 }, /* Hf */

{ NULL, 0 }, /* Hf */

{ NULL, 0, 0 }, /* Fr */

{ NULL, 0 }, /* Fr */

{ NULL, 0, 0 }, /* Ud */

{ NULL, 0 }, /* Ud */

{ NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */

{ NULL, TYPE_Lb }, /* Lb */

{ NULL, 0, 0 }, /* Lp */

{ NULL, 0 }, /* Lp */

{ NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */

{ NULL, TYPE_Lk }, /* Lk */

{ NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */

{ NULL, TYPE_Mt }, /* Mt */

{ NULL, 0, 0 }, /* Brq */

{ NULL, 0 }, /* Brq */

{ NULL, 0, 0 }, /* Bro */

{ NULL, 0 }, /* Bro */

{ NULL, 0, 0 }, /* Brc */

{ NULL, 0 }, /* Brc */

{ NULL, 0, 0 }, /* %C */

{ NULL, 0 }, /* %C */

{ NULL, 0, 0 }, /* Es */

{ NULL, 0 }, /* Es */

{ NULL, 0, 0 }, /* En */

{ NULL, 0 }, /* En */

{ NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */

{ NULL, TYPE_Dx }, /* Dx */

{ NULL, 0, 0 }, /* %Q */

{ NULL, 0 }, /* %Q */

{ NULL, 0, 0 }, /* br */

{ NULL, 0 }, /* br */

{ NULL, 0, 0 }, /* sp */

{ NULL, 0 }, /* sp */

{ NULL, 0, 0 }, /* %U */

{ NULL, 0 }, /* %U */

{ NULL, 0, 0 }, /* Ta */

{ NULL, 0 }, /* Ta */

};

int

main(int argc, char *argv[])

{

int ch, i;

unsigned int index;

size_t j, sz;

const char *path_arg;

struct str *s;

struct mchars *mc;

struct manpaths dirs;

struct mparse *mp;

struct ohash_info ino_info, filename_info, str_info;

struct ohash_info mpages_info, mlinks_info;

memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));

memset(&dirs, 0, sizeof(struct manpaths));

ino_info.halloc = filename_info.halloc =

mpages_info.alloc = mlinks_info.alloc = hash_alloc;

str_info.halloc = hash_halloc;

mpages_info.halloc = mlinks_info.halloc = hash_halloc;

ino_info.hfree = filename_info.hfree =

mpages_info.hfree = mlinks_info.hfree = hash_free;

str_info.hfree = hash_free;

ino_info.alloc = filename_info.alloc =

str_info.alloc = hash_alloc;

ino_info.key_offset = offsetof(struct of, id);

mpages_info.key_offset = offsetof(struct mpage, inodev);

filename_info.key_offset = offsetof(struct of, file);

mlinks_info.key_offset = offsetof(struct mlink, file);

str_info.key_offset = offsetof(struct str, key);

progname = strrchr(argv[0], '/');

if (progname == NULL)

Line 355 main(int argc, char *argv[])

Line 352 main(int argc, char *argv[])

path_arg = NULL;

op = OP_DEFAULT;

while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))

while (-1 != (ch = getopt(argc, argv, "aC:d:nT:tu:vW")))

switch (ch) {

case ('a'):

use_all = 1;

Line 373 main(int argc, char *argv[])

Line 370 main(int argc, char *argv[])

case ('n'):

nodb = 1;

break;

case ('T'):

if (strcmp(optarg, "utf8")) {

fprintf(stderr, "-T%s: Unsupported "

"output format\n", optarg);

goto usage;

}

write_utf8 = 1;

break;

case ('t'):

CHECKOP(op, ch);

dup2(STDOUT_FILENO, STDERR_FILENO);

Line 407 main(int argc, char *argv[])

Line 412 main(int argc, char *argv[])

MANDOCLEVEL_FATAL, NULL, NULL, NULL);

mc = mchars_alloc();

ohash_init(&strings, 6, &str_info);

ohash_init(&mpages, 6, &mpages_info);

ohash_init(&inos, 6, &ino_info);

ohash_init(&mlinks, 6, &mlinks_info);

ohash_init(&filenames, 6, &filename_info);

if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {

Line 431 main(int argc, char *argv[])

Line 435 main(int argc, char *argv[])

if (OP_TEST != op)

dbprune();

if (OP_DELETE != op)

ofmerge(mc, mp);

mpages_merge(mc, mp, 0);

dbclose(1);

} else {

Line 449 main(int argc, char *argv[])

Line 453 main(int argc, char *argv[])

manpath_parse(&dirs, path_arg, NULL, NULL);

* First scan the tree rooted at a base directory.

* First scan the tree rooted at a base directory, then

* Then whak its database (if one exists), parse, and

* build a new database and finally move it into place.

* build up the database.

* Ignore zero-length directories and strip trailing

* slashes.

Line 461 main(int argc, char *argv[])

Line 464 main(int argc, char *argv[])

dirs.paths[j][--sz] = '\0';

if (0 == sz)

continue;

if (j) {

ohash_init(&mpages, 6, &mpages_info);

ohash_init(&mlinks, 6, &mlinks_info);

}

if (0 == set_basedir(dirs.paths[j]))

goto out;

if (0 == treescan())

Line 470 main(int argc, char *argv[])

Line 479 main(int argc, char *argv[])

if (0 == dbopen(0))

goto out;

mpages_merge(mc, mp, warnings && !use_all);

* Since we're opening up a new database, we can

* turn off synchronous mode for much better

* performance.

#ifndef __APPLE__

SQL_EXEC("PRAGMA synchronous = OFF");

#endif

ofmerge(mc, mp);

dbclose(0);

offree();

ohash_delete(&inos);

if (j + 1 < dirs.sz) {

ohash_init(&inos, 6, &ino_info);

mpages_free();

ohash_delete(&filenames);

ohash_delete(&mpages);

ohash_init(&filenames, 6, &filename_info);

ohash_delete(&mlinks);

}

out:

Line 493 out:

Line 494 out:

manpath_free(&dirs);

mchars_free(mc);

mparse_free(mp);

for (s = ohash_first(&strings, &index);

mpages_free();

NULL != s; s = ohash_next(&strings, &index)) {

ohash_delete(&mpages);

if (s->utf8 != s->key)

ohash_delete(&mlinks);

free(s->utf8);

free(s);

}

ohash_delete(&strings);

ohash_delete(&inos);

ohash_delete(&filenames);

offree();

return(exitcode);

usage:

fprintf(stderr, "usage: %s [-anvW] [-C file]\n"

fprintf(stderr, "usage: %s [-anvW] [-C file] [-Tutf8]\n"

" %s [-anvW] dir ...\n"

" %s [-anvW] [-Tutf8] dir ...\n"

" %s [-nvW] -d dir [file ...]\n"

" %s [-nvW] [-Tutf8] -d dir [file ...]\n"

" %s [-nvW] -u dir [file ...]\n"

" %s -t file ...\n",

progname, progname, progname,

Line 535 treescan(void)

Line 529 treescan(void)

{

FTS *f;

FTSENT *ff;

struct mlink *mlink;

int dform;

char *sec;

char *dsec, *arch, *fsec, *cp;

const char *dsec, *arch, *cp, *name, *path;

const char *path;

const char *argv[2];

argv[0] = ".";

Line 560 treescan(void)

Line 555 treescan(void)

while (NULL != (ff = fts_read(f))) {

path = ff->fts_path + 2;

* If we're a regular file, add an "of" by using the

* If we're a regular file, add an mlink by using the

* stored directory data and handling the filename.

* Disallow duplicate (hard-linked) files.

if (FTS_F == ff->fts_info) {

if (0 == strcmp(path, MANDOC_DB))

Line 571 treescan(void)

Line 565 treescan(void)

if (warnings)

say(path, "Extraneous file");

continue;

} else if (inocheck(ff->fts_statp)) {

} else if (NULL == (fsec =

if (warnings)

say(path, "Duplicate file");

continue;

} else if (NULL == (sec =

strrchr(ff->fts_name, '.'))) {

if ( ! use_all) {

if (warnings)

Line 583 treescan(void)

Line 573 treescan(void)

"No filename suffix");

continue;

}

} else if (0 == strcmp(++sec, "html")) {

} else if (0 == strcmp(++fsec, "html")) {

if (warnings)

say(path, "Skip html");

continue;

} else if (0 == strcmp(sec, "gz")) {

} else if (0 == strcmp(fsec, "gz")) {

if (warnings)

say(path, "Skip gz");

continue;

} else if (0 == strcmp(sec, "ps")) {

} else if (0 == strcmp(fsec, "ps")) {

if (warnings)

say(path, "Skip ps");

continue;

} else if (0 == strcmp(sec, "pdf")) {

} else if (0 == strcmp(fsec, "pdf")) {

if (warnings)

say(path, "Skip pdf");

continue;

} else if ( ! use_all &&

((FORM_SRC == dform && strcmp(sec, dsec)) ||

((FORM_SRC == dform && strcmp(fsec, dsec)) ||

(FORM_CAT == dform && strcmp(sec, "0")))) {

(FORM_CAT == dform && strcmp(fsec, "0")))) {

if (warnings)

say(path, "Wrong filename suffix");

continue;

} else {

} else

sec[-1] = '\0';

fsec[-1] = '\0';

sec = stradd(sec);

}

mlink = mandoc_calloc(1, sizeof(struct mlink));

name = stradd(ff->fts_name);

strlcpy(mlink->file, path, sizeof(mlink->file));

ofadd(dform, path,

mlink->dform = dform;

name, dsec, sec, arch, ff->fts_statp);

mlink->dsec = dsec;

mlink->arch = arch;

mlink->name = ff->fts_name;

mlink->fsec = fsec;

mlink_add(mlink, ff->fts_statp);

continue;

} else if (FTS_D != ff->fts_info &&

FTS_DP != ff->fts_info) {

if (warnings)

say(path, "Not a regular file");

Line 630 treescan(void)

Line 624 treescan(void)

* Try to infer this from the name.

* If we're not in use_all, enforce it.

dsec = NULL;

dform = FORM_NONE;

cp = ff->fts_name;

if (FTS_DP == ff->fts_info)

break;

if (0 == strncmp(cp, "man", 3)) {

dform = FORM_SRC;

dsec = stradd(cp + 3);

dsec = cp + 3;

} else if (0 == strncmp(cp, "cat", 3)) {

dform = FORM_CAT;

dsec = stradd(cp + 3);

dsec = cp + 3;

} else {

dform = FORM_NONE;

dsec = NULL;

}

if (NULL != dsec || use_all)

Line 656 treescan(void)

Line 651 treescan(void)

* Possibly our architecture.

* If we're descending, keep tabs on it.

arch = NULL;

if (FTS_DP != ff->fts_info && NULL != dsec)

arch = stradd(ff->fts_name);

arch = ff->fts_name;

else

arch = NULL;

break;

default:

if (FTS_DP == ff->fts_info || use_all)

Line 675 treescan(void)

Line 671 treescan(void)

}

* Add a file to the file vector.

* Add a file to the mlinks table.

* Do not verify that it's a "valid" looking manpage (we'll do that

* later).

Line 686 treescan(void)

Line 682 treescan(void)

* or

* [./]cat<section>[/<arch>]/<name>.0

* Stuff this information directly into the "of" vector.

* See treescan() for the fts(3) version of this.

static void

filescan(const char *file)

{

char buf[PATH_MAX];

const char *sec, *arch, *name, *dsec;

char *p, *start;

int dform;

struct stat st;

struct mlink *mlink;

char *p, *start;

assert(use_all);

Line 707 filescan(const char *file)

Line 701 filescan(const char *file)

exitcode = (int)MANDOCLEVEL_BADARG;

say(file, NULL);

return;

} else if (strstr(buf, basedir) != buf) {

} else if (OP_TEST != op && strstr(buf, basedir) != buf) {

exitcode = (int)MANDOCLEVEL_BADARG;

say("", "%s: outside base directory", buf);

return;

Line 719 filescan(const char *file)

Line 713 filescan(const char *file)

exitcode = (int)MANDOCLEVEL_BADARG;

say(file, "Not a regular file");

return;

} else if (inocheck(&st)) {

if (warnings)

say(file, "Duplicate file");

return;

}

start = buf + strlen(basedir);

sec = arch = name = dsec = NULL;

mlink = mandoc_calloc(1, sizeof(struct mlink));

dform = FORM_NONE;

strlcpy(mlink->file, start, sizeof(mlink->file));

* First try to guess our directory structure.

Line 737 filescan(const char *file)

Line 727 filescan(const char *file)

if (NULL != (p = strchr(start, '/'))) {

*p++ = '\0';

if (0 == strncmp(start, "man", 3)) {

dform = FORM_SRC;

mlink->dform = FORM_SRC;

dsec = start + 3;

mlink->dsec = start + 3;

} else if (0 == strncmp(start, "cat", 3)) {

dform = FORM_CAT;

mlink->dform = FORM_CAT;

dsec = start + 3;

mlink->dsec = start + 3;

}

start = p;

if (NULL != dsec && NULL != (p = strchr(start, '/'))) {

if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {

*p++ = '\0';

arch = start;

mlink->arch = start;

start = p;

}

Line 762 filescan(const char *file)

Line 752 filescan(const char *file)

if ('.' == *p) {

*p++ = '\0';

sec = p;

mlink->fsec = p;

}

* Now try to parse the name.

* Use the filename portion of the path.

name = start;

mlink->name = start;

if (NULL != (p = strrchr(start, '/'))) {

name = p + 1;

mlink->name = p + 1;

*p = '\0';

}

mlink_add(mlink, &st);

ofadd(dform, file, name, dsec, sec, arch, &st);

}

static void

* See fileadd().

mlink_add(struct mlink *mlink, const struct stat *st)

static int

filecheck(const char *name)

{

unsigned int index;

struct inodev inodev;

struct mpage *mpage;

unsigned int slot;

index = ohash_qlookup(&filenames, name);

assert(NULL != mlink->file);

return(NULL != ohash_find(&filenames, index));

mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");

mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");

mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");

mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");

if ('0' == *mlink->fsec) {

free(mlink->fsec);

mlink->fsec = mandoc_strdup(mlink->dsec);

mlink->fform = FORM_CAT;

} else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)

mlink->fform = FORM_SRC;

else

mlink->fform = FORM_NONE;

slot = ohash_qlookup(&mlinks, mlink->file);

assert(NULL == ohash_find(&mlinks, slot));

ohash_insert(&mlinks, slot, mlink);

inodev.st_ino = st->st_ino;

inodev.st_dev = st->st_dev;

slot = ohash_lookup_memory(&mpages, (char *)&inodev,

sizeof(struct inodev), inodev.st_ino);

mpage = ohash_find(&mpages, slot);

if (NULL == mpage) {

mpage = mandoc_calloc(1, sizeof(struct mpage));

mpage->inodev.st_ino = inodev.st_ino;

mpage->inodev.st_dev = inodev.st_dev;

ohash_insert(&mpages, slot, mpage);

} else

mlink->next = mpage->mlinks;

mpage->mlinks = mlink;

}

* Use the standard hashing mechanism (K&R) to see if the given filename

* already exists.

static void

fileadd(struct of *of)

mlink_free(struct mlink *mlink)

{

unsigned int index;

index = ohash_qlookup(&filenames, of->file);

free(mlink->dsec);

assert(NULL == ohash_find(&filenames, index));

free(mlink->arch);

ohash_insert(&filenames, index, of);

free(mlink->name);

free(mlink->fsec);

free(mlink);

}

static void

* See inoadd().

mpages_free(void)

static int

inocheck(const struct stat *st)

{

struct id id;

struct mpage *mpage;

uint32_t hash;

struct mlink *mlink;

unsigned int index;

unsigned int slot;

memset(&id, 0, sizeof(id));

mpage = ohash_first(&mpages, &slot);

id.ino = hash = st->st_ino;

while (NULL != mpage) {

id.dev = st->st_dev;

while (NULL != (mlink = mpage->mlinks)) {

index = ohash_lookup_memory

mpage->mlinks = mlink->next;

(&inos, (char *)&id, sizeof(id), hash);

mlink_free(mlink);

}

return(NULL != ohash_find(&inos, index));

free(mpage->sec);

free(mpage->arch);

free(mpage->title);

free(mpage->desc);

free(mpage);

mpage = ohash_next(&mpages, &slot);

}

* The hashing function used here is quite simple: simply take the inode

* For each mlink to the mpage, check whether the path looks like

* and use uint32_t of its bits.

* it is formatted, and if it does, check whether a source manual

* Then when we do the lookup, use both the inode and device identifier.

* exists by the same name, ignoring the suffix.

* If both conditions hold, drop the mlink.

static void

inoadd(const struct stat *st, struct of *of)

mlinks_undupe(struct mpage *mpage)

{

uint32_t hash;

char buf[PATH_MAX];

unsigned int index;

struct mlink **prev;

struct mlink *mlink;

char *bufp;

of->id.ino = hash = st->st_ino;

mpage->form = FORM_CAT;

of->id.dev = st->st_dev;

prev = &mpage->mlinks;

index = ohash_lookup_memory

while (NULL != (mlink = *prev)) {

(&inos, (char *)&of->id, sizeof(of->id), hash);

if (FORM_CAT != mlink->dform) {

mpage->form = FORM_NONE;

assert(NULL == ohash_find(&inos, index));

goto nextlink;

ohash_insert(&inos, index, of);

}

if (strlcpy(buf, mlink->file, PATH_MAX) >= PATH_MAX) {

if (warnings)

say(mlink->file, "Filename too long");

goto nextlink;

}

bufp = strstr(buf, "cat");

assert(NULL != bufp);

memcpy(bufp, "man", 3);

if (NULL != (bufp = strrchr(buf, '.')))

*++bufp = '\0';

strlcat(buf, mlink->dsec, PATH_MAX);

if (NULL == ohash_find(&mlinks,

ohash_qlookup(&mlinks, buf)))

goto nextlink;

if (warnings)

say(mlink->file, "Man source exists: %s", buf);

if (use_all)

goto nextlink;

*prev = mlink->next;

mlink_free(mlink);

continue;

nextlink:

prev = &(*prev)->next;

}

static void

static int

ofadd(int dform, const char *file, const char *name, const char *dsec,

mlink_check(struct mpage *mpage, struct mlink *mlink)

const char *sec, const char *arch, const struct stat *st)

{

struct of *of;

int match;

int sform;

assert(NULL != file);

match = 1;

if (NULL == name)

name = "";

* Check whether the manual section given in a file

if (NULL == sec)

* agrees with the directory where the file is located.

sec = "";

* Some manuals have suffixes like (3p) on their

if (NULL == dsec)

* section number either inside the file or in the

dsec = "";

* directory name, some are linked into more than one

if (NULL == arch)

* section, like encrypt(1) = makekey(8).

arch = "";

sform = FORM_NONE;

if (FORM_SRC == mpage->form &&

if (NULL != sec && *sec <= '9' && *sec >= '1')

strcasecmp(mpage->sec, mlink->dsec)) {

sform = FORM_SRC;

match = 0;

else if (NULL != sec && *sec == '0') {

say(mlink->file, "Section \"%s\" manual in %s directory",

sec = dsec;

mpage->sec, mlink->dsec);

sform = FORM_CAT;

}

of = mandoc_calloc(1, sizeof(struct of));

strlcpy(of->file, file, PATH_MAX);

of->name = name;

of->sec = sec;

of->dsec = dsec;

of->arch = arch;

of->sform = sform;

of->dform = dform;

of->next = ofs;

ofs = of;

* Add to unique identifier hash.

* Manual page directories exist for each kernel

* Then if it's a source manual and we're going to use source in

* architecture as returned by machine(1).

* favour of catpages, add it to that hash.

* However, many manuals only depend on the

* application architecture as returned by arch(1).

* For example, some (2/ARM) manuals are shared

* across the "armish" and "zaurus" kernel

* architectures.

* A few manuals are even shared across completely

* different architectures, for example fdformat(1)

* on amd64, i386, sparc, and sparc64.

inoadd(st, of);

fileadd(of);

}

static void

if (strcasecmp(mpage->arch, mlink->arch)) {

offree(void)

match = 0;

{

say(mlink->file, "Architecture \"%s\" manual in "

struct of *of;

"\"%s\" directory", mpage->arch, mlink->arch);

while (NULL != (of = ofs)) {

ofs = of->next;

free(of);

}

if (strcasecmp(mpage->title, mlink->name))

match = 0;

return(match);

}

* Run through the files in the global vector "ofs" and add them to the

* Run through the files in the global vector "mpages"

* database specified in "basedir".

* and add them to the database specified in "basedir".

* This handles the parsing scheme itself, using the cues of directory

* and filename to determine whether the file is parsable or not.

static void

ofmerge(struct mchars *mc, struct mparse *mp)

mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable)

{

int form;

struct ohash title_table;

size_t sz;

struct ohash_info title_info, str_info;

struct mdoc *mdoc;

struct mpage *mpage;

struct man *man;

struct mlink *mlink;

char buf[PATH_MAX];

struct mdoc *mdoc;

char *bufp;

struct man *man;

const char *msec, *march, *mtitle, *cp;

struct title *title_entry;

struct of *of;

char *title_str;

enum mandoclevel lvl;

const char *cp;

int match;

unsigned int pslot, tslot;

enum mandoclevel lvl;

for (of = ofs; NULL != of; of = of->next) {

str_info.alloc = hash_alloc;

str_info.halloc = hash_halloc;

* If we're a catpage (as defined by our path), then see

str_info.hfree = hash_free;

* if a manpage exists by the same name (ignoring the

str_info.key_offset = offsetof(struct str, key);

* suffix).

* If it does, then we want to use it instead of our

if (check_reachable) {

* own.

title_info.alloc = hash_alloc;

title_info.halloc = hash_halloc;

if ( ! use_all && FORM_CAT == of->dform) {

title_info.hfree = hash_free;

sz = strlcpy(buf, of->file, PATH_MAX);

title_info.key_offset = offsetof(struct title, title);

if (sz >= PATH_MAX) {

ohash_init(&title_table, 6, &title_info);

if (warnings)

}

say(of->file, "Filename too long");

continue;

mpage = ohash_first(&mpages, &pslot);

}

while (NULL != mpage) {

bufp = strstr(buf, "cat");

mlinks_undupe(mpage);

assert(NULL != bufp);

if (NULL == mpage->mlinks) {

memcpy(bufp, "man", 3);

mpage = ohash_next(&mpages, &pslot);

if (NULL != (bufp = strrchr(buf, '.')))

continue;

*++bufp = '\0';

strlcat(buf, of->dsec, PATH_MAX);

if (filecheck(buf)) {

if (warnings)

say(of->file, "Man "

"source exists: %s", buf);

continue;

}

words = NULL;

ohash_init(&strings, 6, &str_info);

mparse_reset(mp);

mdoc = NULL;

man = NULL;

form = 0;

msec = of->dsec;

march = of->arch;

mtitle = of->name;

* Try interpreting the file as mdoc(7) or man(7)

* source code, unless it is already known to be

* formatted. Fall back to formatted mode.

if (FORM_SRC == of->dform || FORM_SRC == of->sform) {

if (FORM_CAT != mpage->mlinks->dform ||

lvl = mparse_readfd(mp, -1, of->file);

FORM_CAT != mpage->mlinks->fform) {

lvl = mparse_readfd(mp, -1, mpage->mlinks->file);

if (lvl < MANDOCLEVEL_FATAL)

mparse_result(mp, &mdoc, &man);

}

if (NULL != mdoc) {

form = 1;

mpage->form = FORM_SRC;

msec = mdoc_meta(mdoc)->msec;

mpage->sec =

march = mdoc_meta(mdoc)->arch;

mandoc_strdup(mdoc_meta(mdoc)->msec);

mtitle = mdoc_meta(mdoc)->title;

mpage->arch = mdoc_meta(mdoc)->arch;

mpage->arch = mandoc_strdup(

NULL == mpage->arch ? "" : mpage->arch);

mpage->title =

mandoc_strdup(mdoc_meta(mdoc)->title);

} else if (NULL != man) {

form = 1;

mpage->form = FORM_SRC;

msec = man_meta(man)->msec;

mpage->sec =

march = "";

mandoc_strdup(man_meta(man)->msec);

mtitle = man_meta(man)->title;

mpage->arch =

}

mandoc_strdup(mpage->mlinks->arch);

mpage->title =

mandoc_strdup(man_meta(man)->title);

} else {

mpage->form = FORM_CAT;

mpage->sec =

mandoc_strdup(mpage->mlinks->dsec);

mpage->arch =

mandoc_strdup(mpage->mlinks->arch);

mpage->title =

mandoc_strdup(mpage->mlinks->name);

}

putkey(mpage, mpage->sec, TYPE_sec);

putkey(mpage, '\0' == *mpage->arch ?

"any" : mpage->arch, TYPE_arch);

if (NULL == msec)

for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {

msec = "";

if ('\0' != *mlink->dsec)

if (NULL == march)

putkey(mpage, mlink->dsec, TYPE_sec);

march = "";

if ('\0' != *mlink->fsec)

if (NULL == mtitle)

putkey(mpage, mlink->fsec, TYPE_sec);

mtitle = "";

putkey(mpage, '\0' == *mlink->arch ?

"any" : mlink->arch, TYPE_arch);

putkey(mpage, mlink->name, TYPE_Nm);

}

if (warnings && !use_all) {

* Check whether the manual section given in a file

match = 0;

* agrees with the directory where the file is located.

for (mlink = mpage->mlinks; mlink;

* Some manuals have suffixes like (3p) on their

mlink = mlink->next)

* section number either inside the file or in the

if (mlink_check(mpage, mlink))

* directory name, some are linked into more than one

match = 1;

* section, like encrypt(1) = makekey(8). Do not skip

} else

* manuals for such reasons.

match = 1;

if (warnings && !use_all && form &&

strcasecmp(msec, of->dsec))

say(of->file, "Section \"%s\" "

"manual in %s directory",

msec, of->dsec);

* Manual page directories exist for each kernel

* architecture as returned by machine(1).

* However, many manuals only depend on the

* application architecture as returned by arch(1).

* For example, some (2/ARM) manuals are shared

* across the "armish" and "zaurus" kernel

* architectures.

* A few manuals are even shared across completely

* different architectures, for example fdformat(1)

* on amd64, i386, sparc, and sparc64.

* Thus, warn about architecture mismatches,

* but don't skip manuals for this reason.

if (warnings && !use_all && strcasecmp(march, of->arch))

say(of->file, "Architecture \"%s\" "

"manual in \"%s\" directory",

march, of->arch);

putkey(of, of->name, TYPE_Nm);

if (NULL != mdoc) {

if (NULL != (cp = mdoc_meta(mdoc)->name))

putkey(of, cp, TYPE_Nm);

putkey(mpage, cp, TYPE_Nm);

parse_mdoc(of, mdoc_node(mdoc));

assert(NULL == mpage->desc);

parse_mdoc(mpage, mdoc_node(mdoc));

putkey(mpage, NULL != mpage->desc ?

mpage->desc : mpage->mlinks->name, TYPE_Nd);

} else if (NULL != man)

parse_man(of, man_node(man));

parse_man(mpage, man_node(man));

else

parse_catpage(of);

parse_cat(mpage);

dbindex(mc, form, of);

* Build a title string for the file. If it matches

* the location of the file, remember the title as

* found; else, remember it as missing.

if (check_reachable) {

if (-1 == asprintf(&title_str, "%s(%s%s%s)",

mpage->title, mpage->sec,

'\0' == *mpage->arch ? "" : "/",

mpage->arch)) {

perror(NULL);

exit((int)MANDOCLEVEL_SYSERR);

}

tslot = ohash_qlookup(&title_table, title_str);

title_entry = ohash_find(&title_table, tslot);

if (NULL == title_entry) {

title_entry = mandoc_malloc(

sizeof(struct title));

title_entry->title = title_str;

title_entry->file = mandoc_strdup(

match ? "" : mpage->mlinks->file);

ohash_insert(&title_table, tslot,

title_entry);

} else {

if (match)

*title_entry->file = '\0';

free(title_str);

}

dbindex(mpage, mc);

ohash_delete(&strings);

mpage = ohash_next(&mpages, &pslot);

}

if (check_reachable) {

title_entry = ohash_first(&title_table, &tslot);

while (NULL != title_entry) {

if ('\0' != *title_entry->file)

say(title_entry->file,

"Probably unreachable, title is %s",

title_entry->title);

free(title_entry->title);

free(title_entry->file);

free(title_entry);

title_entry = ohash_next(&title_table, &tslot);

}

ohash_delete(&title_table);

}

static void

parse_catpage(struct of *of)

parse_cat(struct mpage *mpage)

{

FILE *stream;

char *line, *p, *title;

size_t len, plen, titlesz;

if (NULL == (stream = fopen(of->file, "r"))) {

if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) {

if (warnings)

say(of->file, NULL);

say(mpage->mlinks->file, NULL);

return;

}

Line 1099 parse_catpage(struct of *of)

Line 1173 parse_catpage(struct of *of)

if (NULL == title || '\0' == *title) {

if (warnings)

say(of->file, "Cannot find NAME section");

say(mpage->mlinks->file,

putkey(of, of->name, TYPE_Nd);

"Cannot find NAME section");

assert(NULL == mpage->desc);

mpage->desc = mandoc_strdup(mpage->mlinks->name);

putkey(mpage, mpage->mlinks->name, TYPE_Nd);

fclose(stream);

free(title);

return;

Line 1120 parse_catpage(struct of *of)

Line 1197 parse_catpage(struct of *of)

/* Skip to next word. */ ;

} else {

if (warnings)

say(of->file, "No dash in title line");

say(mpage->mlinks->file,

"No dash in title line");

p = title;

}

Line 1138 parse_catpage(struct of *of)

Line 1216 parse_catpage(struct of *of)

plen -= 2;

}

of->desc = stradd(p);

assert(NULL == mpage->desc);

putkey(of, p, TYPE_Nd);

mpage->desc = mandoc_strdup(p);

putkey(mpage, mpage->desc, TYPE_Nd);

fclose(stream);

free(title);

}

Line 1148 parse_catpage(struct of *of)

Line 1227 parse_catpage(struct of *of)

* Put a type/word pair into the word database for this particular file.

static void

putkey(const struct of *of, const char *value, uint64_t type)

putkey(const struct mpage *mpage, const char *value, uint64_t type)

{

assert(NULL != value);

putkeys(of, value, strlen(value), type);

putkeys(mpage, value, strlen(value), type);

}

* Grok all nodes at or below a certain mdoc node into putkey().

static void

putmdockey(const struct of *of, const struct mdoc_node *n, uint64_t m)

putmdockey(const struct mpage *mpage,

const struct mdoc_node *n, uint64_t m)

{

for ( ; NULL != n; n = n->next) {

if (NULL != n->child)

putmdockey(of, n->child, m);

putmdockey(mpage, n->child, m);

if (MDOC_TEXT == n->type)

putkey(of, n->string, m);

putkey(mpage, n->string, m);

}

static void

parse_man(struct of *of, const struct man_node *n)

parse_man(struct mpage *mpage, const struct man_node *n)

{

const struct man_node *head, *body;

char *start, *sv, *title;

Line 1266 parse_man(struct of *of, const struct man_node *n)

Line 1346 parse_man(struct of *of, const struct man_node *n)

byte = start[sz];

start[sz] = '\0';

putkey(of, start, TYPE_Nm);

putkey(mpage, start, TYPE_Nm);

if (' ' == byte) {

start += sz + 1;

Line 1280 parse_man(struct of *of, const struct man_node *n)

Line 1360 parse_man(struct of *of, const struct man_node *n)

}

if (sv == start) {

putkey(of, start, TYPE_Nm);

putkey(mpage, start, TYPE_Nm);

free(title);

return;

}

Line 1302 parse_man(struct of *of, const struct man_node *n)

Line 1382 parse_man(struct of *of, const struct man_node *n)

while (' ' == *start)

start++;

assert(NULL == of->desc);

assert(NULL == mpage->desc);

of->desc = stradd(start);

mpage->desc = mandoc_strdup(start);

putkey(of, start, TYPE_Nd);

putkey(mpage, mpage->desc, TYPE_Nd);

free(title);

return;

}

for (n = n->child; n; n = n->next)

for (n = n->child; n; n = n->next) {

parse_man(of, n);

if (NULL != mpage->desc)

break;

parse_man(mpage, n);

}

static void

parse_mdoc(struct of *of, const struct mdoc_node *n)

parse_mdoc(struct mpage *mpage, const struct mdoc_node *n)

{

assert(NULL != n);

Line 1331 parse_mdoc(struct of *of, const struct mdoc_node *n)

Line 1414 parse_mdoc(struct of *of, const struct mdoc_node *n)

/* FALLTHROUGH */

case (MDOC_TAIL):

if (NULL != mdocs[n->tok].fp)

if (0 == (*mdocs[n->tok].fp)(of, n))

if (0 == (*mdocs[n->tok].fp)(mpage, n))

break;

if (mdocs[n->tok].mask)

if (MDOCF_CHILD & mdocs[n->tok].flags)

putmdockey(mpage, n->child,

putmdockey(of, n->child, mdocs[n->tok].mask);

mdocs[n->tok].mask);

break;

default:

assert(MDOC_ROOT != n->type);

continue;

}

if (NULL != n->child)

parse_mdoc(of, n);

parse_mdoc(mpage, n);

}

static int

parse_mdoc_Fd(struct of *of, const struct mdoc_node *n)

parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n)

{

const char *start, *end;

size_t sz;

Line 1385 parse_mdoc_Fd(struct of *of, const struct mdoc_node *n

Line 1468 parse_mdoc_Fd(struct of *of, const struct mdoc_node *n

end--;

if (end > start)

putkeys(of, start, end - start + 1, TYPE_In);

putkeys(mpage, start, end - start + 1, TYPE_In);

return(1);

return(0);

}

static int

parse_mdoc_In(struct of *of, const struct mdoc_node *n)

parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)

{

if (NULL != n->child && MDOC_TEXT == n->child->type)

return(0);

putkey(of, n->child->string, TYPE_In);

return(1);

}

static int

parse_mdoc_Fn(struct of *of, const struct mdoc_node *n)

{

const char *cp;

if (NULL == (n = n->child) || MDOC_TEXT != n->type)

Line 1421 parse_mdoc_Fn(struct of *of, const struct mdoc_node *n

Line 1493 parse_mdoc_Fn(struct of *of, const struct mdoc_node *n

while ('*' == *cp)

cp++;

putkey(of, cp, TYPE_Fn);

putkey(mpage, cp, TYPE_Fn);

if (n->string < cp)

putkeys(of, n->string, cp - n->string, TYPE_Ft);

putkeys(mpage, n->string, cp - n->string, TYPE_Ft);

for (n = n->next; NULL != n; n = n->next)

if (MDOC_TEXT == n->type)

putkey(of, n->string, TYPE_Fa);

putkey(mpage, n->string, TYPE_Fa);

return(0);

}

static int

parse_mdoc_St(struct of *of, const struct mdoc_node *n)

parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n)

{

char *cp;

if (NULL == n->child || MDOC_TEXT != n->child->type)

if (NULL == (n = n->child))

return(0);

putkey(of, n->child->string, TYPE_St);

if (NULL == n->next) {

return(1);

putkey(mpage, n->string, TYPE_Xr);

}

static int

parse_mdoc_Xr(struct of *of, const struct mdoc_node *n)

{

if (NULL == (n = n->child))

return(0);

}

putkey(of, n->string, TYPE_Xr);

if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) {

return(1);

perror(NULL);

exit((int)MANDOCLEVEL_SYSERR);

}

putkey(mpage, cp, TYPE_Xr);

free(cp);

return(0);

}

static int

parse_mdoc_Nd(struct of *of, const struct mdoc_node *n)

parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)

{

size_t sz;

char *sv, *desc;

if (MDOC_BODY != n->type)

return(0);

Line 1469 parse_mdoc_Nd(struct of *of, const struct mdoc_node *n

Line 1540 parse_mdoc_Nd(struct of *of, const struct mdoc_node *n

* into the document table.

desc = NULL;

for (n = n->child; NULL != n; n = n->next) {

if (MDOC_TEXT == n->type) {

sz = strlen(n->string) + 1;

if (NULL != mpage->desc) {

if (NULL != (sv = desc))

sz = strlen(mpage->desc) +

sz += strlen(desc) + 1;

strlen(n->string) + 2;

desc = mandoc_realloc(desc, sz);

mpage->desc = mandoc_realloc(

if (NULL != sv)

mpage->desc, sz);

strlcat(desc, " ", sz);

strlcat(mpage->desc, " ", sz);

else

strlcat(mpage->desc, n->string, sz);

*desc = '\0';

} else

strlcat(desc, n->string, sz);

mpage->desc = mandoc_strdup(n->string);

}

if (NULL != n->child)

parse_mdoc_Nd(of, n);

parse_mdoc_Nd(mpage, n);

}

of->desc = NULL != desc ? stradd(desc) : NULL;

free(desc);

return(1);

}

static int

parse_mdoc_Nm(struct of *of, const struct mdoc_node *n)

parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)

{

if (SEC_NAME == n->sec)

return(SEC_NAME == n->sec ||

return(1);

(SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type));

else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)

return(0);

return(1);

}

static int

parse_mdoc_Sh(struct of *of, const struct mdoc_node *n)

parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n)

{

return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);

}

static int

parse_mdoc_head(struct of *of, const struct mdoc_node *n)

parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n)

{

return(MDOC_HEAD == n->type);

}

static int

parse_mdoc_body(struct of *of, const struct mdoc_node *n)

parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n)

{

return(MDOC_BODY == n->type);

}

* See stradds().

* Add a string to the hash table for the current manual.

* Each string has a bitmask telling which macros it belongs to.

* When we finish the manual, we'll dump the table.

static char *

stradd(const char *cp)

{

return(stradds(cp, strlen(cp)));

}

* This looks up or adds a string to the string table.

* The string table is a table of all strings encountered during parse

* or file scan.

* In using it, we avoid having thousands of (e.g.) "cat1" string

* allocations for the "of" table.

* We also have a layer atop the string table for keeping track of words

* in a parse sequence (see putkeys()).

static char *

stradds(const char *cp, size_t sz)

{

struct str *s;

unsigned int index;

const char *end;

if (NULL != (s = hashget(cp, sz)))

return(s->key);

s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);

memcpy(s->key, cp, sz);

end = cp + sz;

index = ohash_qlookupi(&strings, cp, &end);

assert(NULL == ohash_find(&strings, index));

ohash_insert(&strings, index, s);

return(s->key);

}

static struct str *

hashget(const char *cp, size_t sz)

{

unsigned int index;

const char *end;

end = cp + sz;

index = ohash_qlookupi(&strings, cp, &end);

return(ohash_find(&strings, index));

}

* Add a word to the current parse sequence.

* Within the hashtable of strings, we maintain a list of strings that

* are currently indexed.

* Each of these ("words") has a bitmask modified within the parse.

* When we finish a parse, we'll dump the list, then remove the head

* entry -- since the next parse will have a new "of", it can keep track

* of its entries without conflict.

static void

putkeys(const struct of *of, const char *cp, size_t sz, uint64_t v)

putkeys(const struct mpage *mpage,

const char *cp, size_t sz, uint64_t v)

{

struct str *s;

unsigned int index;

unsigned int slot;

const char *end;

if (0 == sz)

return;

s = hashget(cp, sz);

end = cp + sz;

slot = ohash_qlookupi(&strings, cp, &end);

s = ohash_find(&strings, slot);

if (NULL != s && of == s->of) {

if (NULL != s && mpage == s->mpage) {

s->mask |= v;

return;

} else if (NULL == s) {

s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);

memcpy(s->key, cp, sz);

end = cp + sz;

ohash_insert(&strings, slot, s);

index = ohash_qlookupi(&strings, cp, &end);

assert(NULL == ohash_find(&strings, index));

ohash_insert(&strings, index, s);

}

s->mpage = mpage;

s->next = words;

s->of = of;

s->mask = v;

words = s;

}

Line 1666 utf8(unsigned int cp, char out[7])

Line 1672 utf8(unsigned int cp, char out[7])

}

* Store the UTF-8 version of a key, or alias the pointer if the key has

* Store the rendered version of a key, or alias the pointer

* no UTF-8 transcription marks in it.

* if the key contains no escape sequences.

static void

utf8key(struct mchars *mc, struct str *key)

render_key(struct mchars *mc, struct str *key)

{

size_t sz, bsz, pos;

char utfbuf[7], res[5];

Line 1679 utf8key(struct mchars *mc, struct str *key)

Line 1685 utf8key(struct mchars *mc, struct str *key)

int len, u;

enum mandoc_esc esc;

assert(NULL == key->utf8);

assert(NULL == key->rendered);

res[0] = '\\';

res[1] = '\t';

Line 1695 utf8key(struct mchars *mc, struct str *key)

Line 1701 utf8key(struct mchars *mc, struct str *key)

* pointer as ourselvse and get out of here.

if (strcspn(val, res) == bsz) {

key->utf8 = key->key;

key->rendered = key->key;

return;

}

Line 1730 utf8key(struct mchars *mc, struct str *key)

Line 1736 utf8key(struct mchars *mc, struct str *key)

/* Read past the slash. */

val++;

u = 0;

* Parse the escape sequence and see if it's a

* predefined character or special character.

esc = mandoc_escape

((const char **)&val, &seq, &len);

if (ESCAPE_ERROR == esc)

break;

if (ESCAPE_SPECIAL != esc)

continue;

if (0 == (u = mchars_spec2cp(mc, seq, len)))

continue;

* If we have a Unicode codepoint, try to convert that

* Render the special character

* to a UTF-8 byte string.

* as either UTF-8 or ASCII.

cpp = utfbuf;

if (0 == (sz = utf8(u, utfbuf)))

continue;

if (write_utf8) {

if (0 == (u = mchars_spec2cp(mc, seq, len)))

continue;

cpp = utfbuf;

if (0 == (sz = utf8(u, utfbuf)))

continue;

sz = strlen(cpp);

} else {

cpp = mchars_spec2str(mc, seq, len, &sz);

if (NULL == cpp)

continue;

if (ASCII_NBRSP == *cpp) {

cpp = " ";

sz = 1;

}

/* Copy the rendered glyph into the stream. */

sz = strlen(cpp);

bsz += sz;

buf = mandoc_realloc(buf, bsz);

memcpy(&buf[pos], cpp, sz);

pos += sz;

}

buf[pos] = '\0';

key->utf8 = buf;

key->rendered = buf;

}

* Flush the current page's terms (and their bits) into the database.

* Wrap the entire set of additions in a transaction to make sqlite be a

* little faster.

* Also, UTF-8-encode the description at the last possible moment.

* Also, handle escape sequences at the last possible moment.

static void

dbindex(struct mchars *mc, int form, const struct of *of)

dbindex(const struct mpage *mpage, struct mchars *mc)

{

struct mlink *mlink;

struct str *key;

const char *desc;

int64_t recno;

size_t i;

unsigned int slot;

if (verb)

say(of->file, "Adding to index");

say(mpage->mlinks->file, "Adding to index");

if (nodb)

return;

desc = "";

if (NULL != of->desc) {

if (NULL != mpage->desc && '\0' != *mpage->desc) {

key = hashget(of->desc, strlen(of->desc));

key = ohash_find(&strings,

ohash_qlookup(&strings, mpage->desc));

assert(NULL != key);

if (NULL == key->utf8)

if (NULL == key->rendered)

utf8key(mc, key);

render_key(mc, key);

desc = key->utf8;

desc = key->rendered;

}

SQL_EXEC("BEGIN TRANSACTION");

i = 1;

SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->file);

SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->sec);

* XXX The following three lines are obsolete

SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->arch);

* and only kept for backward compatibility

SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, desc);

* until apropos(1) and friends have caught up.

SQL_BIND_INT(stmts[STMT_INSERT_DOC], i, form);

SQL_STEP(stmts[STMT_INSERT_DOC]);

SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file);

SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec);

SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch);

SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc);

SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);

SQL_STEP(stmts[STMT_INSERT_PAGE]);

recno = sqlite3_last_insert_rowid(db);

sqlite3_reset(stmts[STMT_INSERT_DOC]);

sqlite3_reset(stmts[STMT_INSERT_PAGE]);

for (key = words; NULL != key; key = key->next) {

for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {

assert(key->of == of);

if (NULL == key->utf8)

utf8key(mc, key);

i = 1;

SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file);

SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);

SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);

SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);

SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, recno);

SQL_STEP(stmts[STMT_INSERT_LINK]);

sqlite3_reset(stmts[STMT_INSERT_LINK]);

}

for (key = ohash_first(&strings, &slot); NULL != key;

key = ohash_next(&strings, &slot)) {

assert(key->mpage == mpage);

if (NULL == key->rendered)

render_key(mc, key);

i = 1;

SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);

SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8);

SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered);

SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno);

SQL_STEP(stmts[STMT_INSERT_KEY]);

sqlite3_reset(stmts[STMT_INSERT_KEY]);

if (key->rendered != key->key)

free(key->rendered);

free(key);

}

SQL_EXEC("END TRANSACTION");

Line 1828 dbindex(struct mchars *mc, int form, const struct of *

Line 1865 dbindex(struct mchars *mc, int form, const struct of *

static void

dbprune(void)

{

struct of *of;

struct mpage *mpage;

struct mlink *mlink;

size_t i;

unsigned int slot;

if (nodb)

return;

for (of = ofs; NULL != of; of = of->next) {

mpage = ohash_first(&mpages, &slot);

while (NULL != mpage) {

mlink = mpage->mlinks;

i = 1;

SQL_BIND_TEXT(stmts[STMT_DELETE], i, of->file);

SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file);

SQL_STEP(stmts[STMT_DELETE]);

SQL_STEP(stmts[STMT_DELETE_PAGE]);

sqlite3_reset(stmts[STMT_DELETE]);

sqlite3_reset(stmts[STMT_DELETE_PAGE]);

if (verb)

say(of->file, "Deleted from index");

say(mlink->file, "Deleted from index");

mpage = ohash_next(&mpages, &slot);

}

Line 1920 dbopen(int real)

Line 1962 dbopen(int real)

return(0);

}

sql = "CREATE TABLE \"docs\" (\n"

* XXX The first three columns in table mpages are obsolete

* and only kept for backward compatibility

* until apropos(1) and friends have caught up.

sql = "CREATE TABLE \"mpages\" (\n"

" \"file\" TEXT NOT NULL,\n"

" \"sec\" TEXT NOT NULL,\n"

" \"arch\" TEXT NOT NULL,\n"

Line 1929 dbopen(int real)

Line 1976 dbopen(int real)

" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"

");\n"

"\n"

"CREATE TABLE \"mlinks\" (\n"

" \"file\" TEXT NOT NULL,\n"

" \"sec\" TEXT NOT NULL,\n"

" \"arch\" TEXT NOT NULL,\n"

" \"name\" TEXT NOT NULL,\n"

" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "

"ON DELETE CASCADE,\n"

" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"

");\n"

"\n"

"CREATE TABLE \"keys\" (\n"

" \"bits\" INTEGER NOT NULL,\n"

" \"key\" TEXT NOT NULL,\n"

" \"docid\" INTEGER NOT NULL REFERENCES docs(id) "

" \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "

"ON DELETE CASCADE,\n"

" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"

");\n"

"\n"

Line 1947 dbopen(int real)

Line 2004 dbopen(int real)

prepare_statements:

SQL_EXEC("PRAGMA foreign_keys = ON");

sql = "DELETE FROM docs where file=?";

sql = "DELETE FROM mpages where file=?";

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE], NULL);

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);

sql = "INSERT INTO docs "

sql = "INSERT INTO mpages "

"(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_DOC], NULL);

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);

sql = "INSERT INTO mlinks "

"(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)";

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);

sql = "INSERT INTO keys "

"(bits,key,docid) VALUES (?,?,?)";

"(bits,key,pageid) VALUES (?,?,?)";

sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);

#ifndef __APPLE__

* When opening a new database, we can turn off

* synchronous mode for much better performance.

if (real)

SQL_EXEC("PRAGMA synchronous = OFF");

#endif

return(1);

}

CVSweb