version 1.49.2.9, 2013/10/10 23:43:04 |
version 1.63, 2013/06/06 15:15:07 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
#include "config.h" |
#include "config.h" |
#endif |
#endif |
|
|
#include <sys/types.h> |
#include <sys/stat.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <dirent.h> |
|
#include <errno.h> |
#include <errno.h> |
#include <fcntl.h> |
#include <fcntl.h> |
|
#include <fts.h> |
#include <getopt.h> |
#include <getopt.h> |
#include <limits.h> |
#include <limits.h> |
|
#include <stddef.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdint.h> |
#include <stdint.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
|
#if defined(__APPLE__) |
#ifdef HAVE_OHASH |
# include <libkern/OSByteOrder.h> |
#include <ohash.h> |
#elif defined(__linux__) |
|
# include <endian.h> |
|
#elif defined(__sun) |
|
# include <sys/byteorder.h> |
|
# include <sys/stat.h> |
|
#else |
#else |
# include <sys/endian.h> |
#include "compat_ohash.h" |
#endif |
#endif |
|
#include <sqlite3.h> |
|
|
#if defined(__linux__) || defined(__sun) |
|
# include <db_185.h> |
|
#else |
|
# include <db.h> |
|
#endif |
|
|
|
#include "man.h" |
|
#include "mdoc.h" |
#include "mdoc.h" |
|
#include "man.h" |
#include "mandoc.h" |
#include "mandoc.h" |
#include "mandocdb.h" |
|
#include "manpath.h" |
#include "manpath.h" |
|
#include "mansearch.h" |
|
|
#define MANDOC_BUFSZ BUFSIZ |
#define SQL_EXEC(_v) \ |
#define MANDOC_SLOP 1024 |
if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ |
|
fprintf(stderr, "%s\n", sqlite3_errmsg(db)) |
|
#define SQL_BIND_TEXT(_s, _i, _v) \ |
|
if (SQLITE_OK != sqlite3_bind_text \ |
|
((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ |
|
fprintf(stderr, "%s\n", sqlite3_errmsg(db)) |
|
#define SQL_BIND_INT(_s, _i, _v) \ |
|
if (SQLITE_OK != sqlite3_bind_int \ |
|
((_s), (_i)++, (_v))) \ |
|
fprintf(stderr, "%s\n", sqlite3_errmsg(db)) |
|
#define SQL_BIND_INT64(_s, _i, _v) \ |
|
if (SQLITE_OK != sqlite3_bind_int64 \ |
|
((_s), (_i)++, (_v))) \ |
|
fprintf(stderr, "%s\n", sqlite3_errmsg(db)) |
|
#define SQL_STEP(_s) \ |
|
if (SQLITE_DONE != sqlite3_step((_s))) \ |
|
fprintf(stderr, "%s\n", sqlite3_errmsg(db)) |
|
|
#define MANDOC_SRC 0x1 |
enum op { |
#define MANDOC_FORM 0x2 |
OP_DEFAULT = 0, /* new dbs from dir list or default config */ |
|
OP_CONFFILE, /* new databases from custom config file */ |
|
OP_UPDATE, /* delete/add entries in existing database */ |
|
OP_DELETE, /* delete entries from existing database */ |
|
OP_TEST /* change no databases, report potential problems */ |
|
}; |
|
|
/* Access to the mandoc database on disk. */ |
enum form { |
|
FORM_SRC, /* format is -man or -mdoc */ |
|
FORM_CAT, /* format is cat */ |
|
FORM_NONE /* format is unknown */ |
|
}; |
|
|
struct mdb { |
struct str { |
char idxn[PATH_MAX]; /* index db filename */ |
char *utf8; /* key in UTF-8 form */ |
char dbn[PATH_MAX]; /* keyword db filename */ |
const struct of *of; /* if set, the owning parse */ |
DB *idx; /* index recno database */ |
struct str *next; /* next in owning parse sequence */ |
DB *db; /* keyword btree database */ |
uint64_t mask; /* bitmask in sequence */ |
|
char key[]; /* the string itself */ |
}; |
}; |
|
|
/* Stack of temporarily unused index records. */ |
struct id { |
|
ino_t ino; |
struct recs { |
dev_t dev; |
recno_t *stack; /* pointer to a malloc'ed array */ |
|
size_t size; /* number of allocated slots */ |
|
size_t cur; /* current number of empty records */ |
|
recno_t last; /* last record number in the index */ |
|
}; |
}; |
|
|
/* Tiny list for files. No need to bring in QUEUE. */ |
|
|
|
struct of { |
struct of { |
char *fname; /* heap-allocated */ |
struct id id; /* used for hashing routine */ |
char *sec; |
struct of *next; /* next in ofs */ |
char *arch; |
enum form dform; /* path-cued form */ |
char *title; |
enum form sform; /* suffix-cued form */ |
int src_form; |
char file[PATH_MAX]; /* filename rel. to manpath */ |
struct of *next; /* NULL for last one */ |
const char *desc; /* parsed description */ |
struct of *first; /* first in list */ |
const char *sec; /* suffix-cued section (or empty) */ |
|
const char *dsec; /* path-cued section (or empty) */ |
|
const char *arch; /* path-cued arch. (or empty) */ |
|
const char *name; /* name (from filename) (not empty) */ |
}; |
}; |
|
|
/* Buffer for storing growable data. */ |
enum stmt { |
|
STMT_DELETE = 0, /* delete manpage */ |
struct buf { |
STMT_INSERT_DOC, /* insert manpage */ |
char *cp; |
STMT_INSERT_KEY, /* insert parsed key */ |
size_t len; /* current length */ |
STMT__MAX |
size_t size; /* total buffer size */ |
|
}; |
}; |
|
|
/* Operation we're going to perform. */ |
typedef int (*mdoc_fp)(struct of *, const struct mdoc_node *); |
|
|
enum op { |
struct mdoc_handler { |
OP_DEFAULT = 0, /* new dbs from dir list or default config */ |
mdoc_fp fp; /* optional handler */ |
OP_CONFFILE, /* new databases from custom config file */ |
uint64_t mask; /* set unless handler returns 0 */ |
OP_UPDATE, /* delete/add entries in existing database */ |
int flags; /* for use by pmdoc_node */ |
OP_DELETE, /* delete entries from existing database */ |
#define MDOCF_CHILD 0x01 /* automatically index child nodes */ |
OP_TEST /* change no databases, report potential problems */ |
|
}; |
}; |
|
|
#define MAN_ARGS DB *hash, \ |
static void dbclose(int); |
struct buf *buf, \ |
static void dbindex(struct mchars *, int, const struct of *); |
struct buf *dbuf, \ |
static int dbopen(int); |
const struct man_node *n |
static void dbprune(void); |
#define MDOC_ARGS DB *hash, \ |
static void fileadd(struct of *); |
struct buf *buf, \ |
static int filecheck(const char *); |
struct buf *dbuf, \ |
static void filescan(const char *); |
const struct mdoc_node *n, \ |
static struct str *hashget(const char *, size_t); |
const struct mdoc_meta *m |
static void *hash_alloc(size_t, void *); |
|
static void hash_free(void *, size_t, void *); |
|
static void *hash_halloc(size_t, void *); |
|
static void inoadd(const struct stat *, struct of *); |
|
static int inocheck(const struct stat *); |
|
static void ofadd(int, const char *, const char *, const char *, |
|
const char *, const char *, const struct stat *); |
|
static void offree(void); |
|
static void ofmerge(struct mchars *, struct mparse *); |
|
static void parse_catpage(struct of *); |
|
static void parse_man(struct of *, const struct man_node *); |
|
static void parse_mdoc(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_body(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_head(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Fd(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Fn(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_In(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Nd(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Nm(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Sh(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_St(struct of *, const struct mdoc_node *); |
|
static int parse_mdoc_Xr(struct of *, const struct mdoc_node *); |
|
static int set_basedir(const char *); |
|
static void putkey(const struct of *, |
|
const char *, uint64_t); |
|
static void putkeys(const struct of *, |
|
const char *, int, uint64_t); |
|
static void putmdockey(const struct of *, |
|
const struct mdoc_node *, uint64_t); |
|
static void say(const char *, const char *, ...); |
|
static char *stradd(const char *); |
|
static char *straddbuf(const char *, size_t); |
|
static int treescan(void); |
|
static size_t utf8(unsigned int, char [7]); |
|
static void utf8key(struct mchars *, struct str *); |
|
static void wordaddbuf(const struct of *, |
|
const char *, size_t, uint64_t); |
|
|
static void buf_appendmdoc(struct buf *, |
static char *progname; |
const struct mdoc_node *, int); |
static int use_all; /* use all found files */ |
static void buf_append(struct buf *, const char *); |
static int nodb; /* no database changes */ |
static void buf_appendb(struct buf *, |
static int verb; /* print what we're doing */ |
const void *, size_t); |
static int warnings; /* warn about crap */ |
static void dbt_put(DB *, const char *, DBT *, DBT *); |
static int exitcode; /* to be returned by main */ |
static void hash_put(DB *, const struct buf *, uint64_t); |
static enum op op; /* operational mode */ |
static void hash_reset(DB **); |
static char basedir[PATH_MAX]; /* current base directory */ |
static void index_merge(const struct of *, struct mparse *, |
static struct ohash inos; /* table of inodes/devices */ |
struct buf *, struct buf *, DB *, |
static struct ohash filenames; /* table of filenames */ |
struct mdb *, struct recs *); |
static struct ohash strings; /* table of all strings */ |
static void index_prune(const struct of *, struct mdb *, |
static struct of *ofs = NULL; /* vector of files to parse */ |
struct recs *); |
static struct str *words = NULL; /* word list in current parse */ |
static void ofile_argbuild(int, char *[], struct of **, |
static sqlite3 *db = NULL; /* current database */ |
const char *); |
static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ |
static void ofile_dirbuild(const char *, const char *, |
|
const char *, int, struct of **); |
|
static void ofile_free(struct of *); |
|
static void pformatted(DB *, struct buf *, |
|
struct buf *, const struct of *); |
|
static int pman_node(MAN_ARGS); |
|
static void pmdoc_node(MDOC_ARGS); |
|
static int pmdoc_head(MDOC_ARGS); |
|
static int pmdoc_body(MDOC_ARGS); |
|
static int pmdoc_Fd(MDOC_ARGS); |
|
static int pmdoc_In(MDOC_ARGS); |
|
static int pmdoc_Fn(MDOC_ARGS); |
|
static int pmdoc_Nd(MDOC_ARGS); |
|
static int pmdoc_Nm(MDOC_ARGS); |
|
static int pmdoc_Sh(MDOC_ARGS); |
|
static int pmdoc_St(MDOC_ARGS); |
|
static int pmdoc_Xr(MDOC_ARGS); |
|
|
|
#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ |
|
|
|
struct mdoc_handler { |
|
int (*fp)(MDOC_ARGS); /* Optional handler. */ |
|
uint64_t mask; /* Set unless handler returns 0. */ |
|
int flags; /* For use by pmdoc_node. */ |
|
}; |
|
|
|
static const struct mdoc_handler mdocs[MDOC_MAX] = { |
static const struct mdoc_handler mdocs[MDOC_MAX] = { |
{ NULL, 0, 0 }, /* Ap */ |
{ NULL, 0, 0 }, /* Ap */ |
{ NULL, 0, 0 }, /* Dd */ |
{ NULL, 0, 0 }, /* Dd */ |
{ NULL, 0, 0 }, /* Dt */ |
{ NULL, 0, 0 }, /* Dt */ |
{ NULL, 0, 0 }, /* Os */ |
{ NULL, 0, 0 }, /* Os */ |
{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ |
{ parse_mdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ |
{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ |
{ parse_mdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ |
{ NULL, 0, 0 }, /* Pp */ |
{ NULL, 0, 0 }, /* Pp */ |
{ NULL, 0, 0 }, /* D1 */ |
{ NULL, 0, 0 }, /* D1 */ |
{ NULL, 0, 0 }, /* Dl */ |
{ NULL, 0, 0 }, /* Dl */ |
Line 187 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 210 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ |
{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ |
{ NULL, 0, 0 }, /* Ex */ |
{ NULL, 0, 0 }, /* Ex */ |
{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ |
{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ |
{ pmdoc_Fd, TYPE_In, 0 }, /* Fd */ |
{ parse_mdoc_Fd, TYPE_In, 0 }, /* Fd */ |
{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ |
{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ |
{ pmdoc_Fn, 0, 0 }, /* Fn */ |
{ parse_mdoc_Fn, 0, 0 }, /* Fn */ |
{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ |
{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ |
{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ |
{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ |
{ pmdoc_In, TYPE_In, 0 }, /* In */ |
{ parse_mdoc_In, TYPE_In, MDOCF_CHILD }, /* In */ |
{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ |
{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ |
{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ |
{ parse_mdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ |
{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ |
{ parse_mdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ |
{ NULL, 0, 0 }, /* Op */ |
{ NULL, 0, 0 }, /* Op */ |
{ NULL, 0, 0 }, /* Ot */ |
{ NULL, 0, 0 }, /* Ot */ |
{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ |
{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ |
{ NULL, 0, 0 }, /* Rv */ |
{ NULL, 0, 0 }, /* Rv */ |
{ pmdoc_St, TYPE_St, 0 }, /* St */ |
{ parse_mdoc_St, TYPE_St, 0 }, /* St */ |
{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ |
{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ |
{ pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ |
{ parse_mdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ |
{ pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ |
{ parse_mdoc_Xr, TYPE_Xr, 0 }, /* Xr */ |
{ NULL, 0, 0 }, /* %A */ |
{ NULL, 0, 0 }, /* %A */ |
{ NULL, 0, 0 }, /* %B */ |
{ NULL, 0, 0 }, /* %B */ |
{ NULL, 0, 0 }, /* %D */ |
{ NULL, 0, 0 }, /* %D */ |
Line 259 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 282 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* Ux */ |
{ NULL, 0, 0 }, /* Ux */ |
{ NULL, 0, 0 }, /* Xc */ |
{ NULL, 0, 0 }, /* Xc */ |
{ NULL, 0, 0 }, /* Xo */ |
{ NULL, 0, 0 }, /* Xo */ |
{ pmdoc_head, TYPE_Fn, 0 }, /* Fo */ |
{ parse_mdoc_head, TYPE_Fn, 0 }, /* Fo */ |
{ NULL, 0, 0 }, /* Fc */ |
{ NULL, 0, 0 }, /* Fc */ |
{ NULL, 0, 0 }, /* Oo */ |
{ NULL, 0, 0 }, /* Oo */ |
{ NULL, 0, 0 }, /* Oc */ |
{ NULL, 0, 0 }, /* Oc */ |
Line 287 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 310 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* Ta */ |
{ NULL, 0, 0 }, /* Ta */ |
}; |
}; |
|
|
static const char *progname; |
|
static int use_all; /* Use all directories and files. */ |
|
static int verb; /* Output verbosity level. */ |
|
static int warnings; /* Potential problems in manuals. */ |
|
|
|
int |
int |
main(int argc, char *argv[]) |
main(int argc, char *argv[]) |
{ |
{ |
struct mparse *mp; /* parse sequence */ |
int ch, i; |
struct manpaths dirs; |
unsigned int index; |
struct mdb mdb; |
size_t j, sz; |
struct recs recs; |
const char *path_arg; |
enum op op; /* current operation */ |
struct str *s; |
const char *dir; |
struct mchars *mc; |
char *cp; |
struct manpaths dirs; |
char pbuf[PATH_MAX]; |
struct mparse *mp; |
int ch, i, flags; |
struct ohash_info ino_info, filename_info, str_info; |
DB *hash; /* temporary keyword hashtable */ |
|
BTREEINFO info; /* btree configuration */ |
|
size_t sz1, sz2, ipath; |
|
struct buf buf, /* keyword buffer */ |
|
dbuf; /* description buffer */ |
|
struct of *of; /* list of files for processing */ |
|
extern int optind; |
|
extern char *optarg; |
|
|
|
|
memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); |
|
memset(&dirs, 0, sizeof(struct manpaths)); |
|
|
|
ino_info.halloc = filename_info.halloc = |
|
str_info.halloc = hash_halloc; |
|
ino_info.hfree = filename_info.hfree = |
|
str_info.hfree = hash_free; |
|
ino_info.alloc = filename_info.alloc = |
|
str_info.alloc = hash_alloc; |
|
|
|
ino_info.key_offset = offsetof(struct of, id); |
|
filename_info.key_offset = offsetof(struct of, file); |
|
str_info.key_offset = offsetof(struct str, key); |
|
|
progname = strrchr(argv[0], '/'); |
progname = strrchr(argv[0], '/'); |
if (progname == NULL) |
if (progname == NULL) |
progname = argv[0]; |
progname = argv[0]; |
else |
else |
++progname; |
++progname; |
|
|
memset(&dirs, 0, sizeof(struct manpaths)); |
/* |
memset(&mdb, 0, sizeof(struct mdb)); |
* We accept a few different invocations. |
memset(&recs, 0, sizeof(struct recs)); |
* The CHECKOP macro makes sure that invocation styles don't |
|
* clobber each other. |
|
*/ |
|
#define CHECKOP(_op, _ch) do \ |
|
if (OP_DEFAULT != (_op)) { \ |
|
fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \ |
|
goto usage; \ |
|
} while (/*CONSTCOND*/0) |
|
|
of = NULL; |
path_arg = NULL; |
mp = NULL; |
|
hash = NULL; |
|
op = OP_DEFAULT; |
op = OP_DEFAULT; |
dir = NULL; |
|
|
|
while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) |
while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW"))) |
switch (ch) { |
switch (ch) { |
case ('a'): |
case ('a'): |
use_all = 1; |
use_all = 1; |
break; |
break; |
case ('C'): |
case ('C'): |
if (op) { |
CHECKOP(op, ch); |
fprintf(stderr, |
path_arg = optarg; |
"-C: conflicting options\n"); |
|
goto usage; |
|
} |
|
dir = optarg; |
|
op = OP_CONFFILE; |
op = OP_CONFFILE; |
break; |
break; |
case ('d'): |
case ('d'): |
if (op) { |
CHECKOP(op, ch); |
fprintf(stderr, |
path_arg = optarg; |
"-d: conflicting options\n"); |
|
goto usage; |
|
} |
|
dir = optarg; |
|
op = OP_UPDATE; |
op = OP_UPDATE; |
break; |
break; |
|
case ('n'): |
|
nodb = 1; |
|
break; |
case ('t'): |
case ('t'): |
|
CHECKOP(op, ch); |
dup2(STDOUT_FILENO, STDERR_FILENO); |
dup2(STDOUT_FILENO, STDERR_FILENO); |
if (op) { |
|
fprintf(stderr, |
|
"-t: conflicting options\n"); |
|
goto usage; |
|
} |
|
op = OP_TEST; |
op = OP_TEST; |
use_all = 1; |
nodb = warnings = 1; |
warnings = 1; |
|
break; |
break; |
case ('u'): |
case ('u'): |
if (op) { |
CHECKOP(op, ch); |
fprintf(stderr, |
path_arg = optarg; |
"-u: conflicting options\n"); |
|
goto usage; |
|
} |
|
dir = optarg; |
|
op = OP_DELETE; |
op = OP_DELETE; |
break; |
break; |
case ('v'): |
case ('v'): |
Line 386 main(int argc, char *argv[]) |
|
Line 400 main(int argc, char *argv[]) |
|
argv += optind; |
argv += optind; |
|
|
if (OP_CONFFILE == op && argc > 0) { |
if (OP_CONFFILE == op && argc > 0) { |
fprintf(stderr, "-C: too many arguments\n"); |
fprintf(stderr, "-C: Too many arguments\n"); |
goto usage; |
goto usage; |
} |
} |
|
|
memset(&info, 0, sizeof(BTREEINFO)); |
exitcode = (int)MANDOCLEVEL_OK; |
info.lorder = 4321; |
mp = mparse_alloc(MPARSE_AUTO, |
info.flags = R_DUP; |
MANDOCLEVEL_FATAL, NULL, NULL, NULL); |
|
mc = mchars_alloc(); |
|
|
mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL); |
ohash_init(&strings, 6, &str_info); |
|
ohash_init(&inos, 6, &ino_info); |
|
ohash_init(&filenames, 6, &filename_info); |
|
|
memset(&buf, 0, sizeof(struct buf)); |
if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { |
memset(&dbuf, 0, sizeof(struct buf)); |
/* |
|
* Force processing all files. |
|
*/ |
|
use_all = 1; |
|
|
buf.size = dbuf.size = MANDOC_BUFSZ; |
/* |
|
* All of these deal with a specific directory. |
|
* Jump into that directory then collect files specified |
|
* on the command-line. |
|
*/ |
|
if (0 == set_basedir(path_arg)) |
|
goto out; |
|
for (i = 0; i < argc; i++) |
|
filescan(argv[i]); |
|
if (0 == dbopen(1)) |
|
goto out; |
|
if (OP_TEST != op) |
|
dbprune(); |
|
if (OP_DELETE != op) |
|
ofmerge(mc, mp); |
|
dbclose(1); |
|
} else { |
|
/* |
|
* If we have arguments, use them as our manpaths. |
|
* If we don't, grok from manpath(1) or however else |
|
* manpath_parse() wants to do it. |
|
*/ |
|
if (argc > 0) { |
|
dirs.paths = mandoc_calloc |
|
(argc, sizeof(char *)); |
|
dirs.sz = (size_t)argc; |
|
for (i = 0; i < argc; i++) |
|
dirs.paths[i] = mandoc_strdup(argv[i]); |
|
} else |
|
manpath_parse(&dirs, path_arg, NULL, NULL); |
|
|
buf.cp = mandoc_malloc(buf.size); |
/* |
dbuf.cp = mandoc_malloc(dbuf.size); |
* First scan the tree rooted at a base directory. |
|
* Then whak its database (if one exists), parse, and |
|
* build up the database. |
|
* Ignore zero-length directories and strip trailing |
|
* slashes. |
|
*/ |
|
for (j = 0; j < dirs.sz; j++) { |
|
sz = strlen(dirs.paths[j]); |
|
if (sz && '/' == dirs.paths[j][sz - 1]) |
|
dirs.paths[j][--sz] = '\0'; |
|
if (0 == sz) |
|
continue; |
|
if (0 == set_basedir(dirs.paths[j])) |
|
goto out; |
|
if (0 == treescan()) |
|
goto out; |
|
if (0 == set_basedir(dirs.paths[j])) |
|
goto out; |
|
if (0 == dbopen(0)) |
|
goto out; |
|
|
if (OP_TEST == op) { |
/* |
ofile_argbuild(argc, argv, &of, NULL); |
* Since we're opening up a new database, we can |
if (NULL == of) |
* turn off synchronous mode for much better |
goto out; |
* performance. |
index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); |
*/ |
goto out; |
#ifndef __APPLE__ |
} |
SQL_EXEC("PRAGMA synchronous = OFF"); |
|
#endif |
|
|
if (OP_UPDATE == op || OP_DELETE == op) { |
ofmerge(mc, mp); |
if (NULL == realpath(dir, pbuf)) { |
dbclose(0); |
perror(dir); |
offree(); |
exit((int)MANDOCLEVEL_BADARG); |
ohash_delete(&inos); |
|
ohash_init(&inos, 6, &ino_info); |
|
ohash_delete(&filenames); |
|
ohash_init(&filenames, 6, &filename_info); |
} |
} |
if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) { |
} |
fprintf(stderr, "%s: path too long\n", pbuf); |
out: |
exit((int)MANDOCLEVEL_BADARG); |
set_basedir(NULL); |
} |
manpath_free(&dirs); |
|
mchars_free(mc); |
|
mparse_free(mp); |
|
for (s = ohash_first(&strings, &index); |
|
NULL != s; s = ohash_next(&strings, &index)) { |
|
if (s->utf8 != s->key) |
|
free(s->utf8); |
|
free(s); |
|
} |
|
ohash_delete(&strings); |
|
ohash_delete(&inos); |
|
ohash_delete(&filenames); |
|
offree(); |
|
return(exitcode); |
|
usage: |
|
fprintf(stderr, "usage: %s [-anvW] [-C file]\n" |
|
" %s [-anvW] dir ...\n" |
|
" %s [-nvW] -d dir [file ...]\n" |
|
" %s [-nvW] -u dir [file ...]\n" |
|
" %s -t file ...\n", |
|
progname, progname, progname, |
|
progname, progname); |
|
|
strlcat(mdb.dbn, pbuf, PATH_MAX); |
return((int)MANDOCLEVEL_BADARG); |
sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX); |
} |
|
|
strlcat(mdb.idxn, pbuf, PATH_MAX); |
/* |
sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX); |
* Scan a directory tree rooted at "basedir" for manpages. |
|
* We use fts(), scanning directory parts along the way for clues to our |
|
* section and architecture. |
|
* |
|
* If use_all has been specified, grok all files. |
|
* If not, sanitise paths to the following: |
|
* |
|
* [./]man*[/<arch>]/<name>.<section> |
|
* or |
|
* [./]cat<section>[/<arch>]/<name>.0 |
|
* |
|
* TODO: accomodate for multi-language directories. |
|
*/ |
|
static int |
|
treescan(void) |
|
{ |
|
FTS *f; |
|
FTSENT *ff; |
|
int dform; |
|
char *sec; |
|
const char *dsec, *arch, *cp, *name, *path; |
|
const char *argv[2]; |
|
|
if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) { |
argv[0] = "."; |
fprintf(stderr, "%s: path too long\n", mdb.idxn); |
argv[1] = (char *)NULL; |
exit((int)MANDOCLEVEL_BADARG); |
|
|
/* |
|
* Walk through all components under the directory, using the |
|
* logical descent of files. |
|
*/ |
|
f = fts_open((char * const *)argv, FTS_LOGICAL, NULL); |
|
if (NULL == f) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say("", NULL); |
|
return(0); |
|
} |
|
|
|
dsec = arch = NULL; |
|
dform = FORM_NONE; |
|
|
|
while (NULL != (ff = fts_read(f))) { |
|
path = ff->fts_path + 2; |
|
/* |
|
* If we're a regular file, add an "of" by using the |
|
* stored directory data and handling the filename. |
|
* Disallow duplicate (hard-linked) files. |
|
*/ |
|
if (FTS_F == ff->fts_info) { |
|
if (0 == strcmp(path, MANDOC_DB)) |
|
continue; |
|
if ( ! use_all && ff->fts_level < 2) { |
|
if (warnings) |
|
say(path, "Extraneous file"); |
|
continue; |
|
} else if (inocheck(ff->fts_statp)) { |
|
if (warnings) |
|
say(path, "Duplicate file"); |
|
continue; |
|
} else if (NULL == (sec = |
|
strrchr(ff->fts_name, '.'))) { |
|
if ( ! use_all) { |
|
if (warnings) |
|
say(path, |
|
"No filename suffix"); |
|
continue; |
|
} |
|
} else if (0 == strcmp(++sec, "html")) { |
|
if (warnings) |
|
say(path, "Skip html"); |
|
continue; |
|
} else if (0 == strcmp(sec, "gz")) { |
|
if (warnings) |
|
say(path, "Skip gz"); |
|
continue; |
|
} else if (0 == strcmp(sec, "ps")) { |
|
if (warnings) |
|
say(path, "Skip ps"); |
|
continue; |
|
} else if (0 == strcmp(sec, "pdf")) { |
|
if (warnings) |
|
say(path, "Skip pdf"); |
|
continue; |
|
} else if ( ! use_all && |
|
((FORM_SRC == dform && strcmp(sec, dsec)) || |
|
(FORM_CAT == dform && strcmp(sec, "0")))) { |
|
if (warnings) |
|
say(path, "Wrong filename suffix"); |
|
continue; |
|
} else { |
|
sec[-1] = '\0'; |
|
sec = stradd(sec); |
|
} |
|
name = stradd(ff->fts_name); |
|
ofadd(dform, path, |
|
name, dsec, sec, arch, ff->fts_statp); |
|
continue; |
|
} else if (FTS_D != ff->fts_info && |
|
FTS_DP != ff->fts_info) { |
|
if (warnings) |
|
say(path, "Not a regular file"); |
|
continue; |
} |
} |
|
|
flags = O_CREAT | O_RDWR; |
switch (ff->fts_level) { |
mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); |
case (0): |
mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); |
/* Ignore the root directory. */ |
|
break; |
|
case (1): |
|
/* |
|
* This might contain manX/ or catX/. |
|
* Try to infer this from the name. |
|
* If we're not in use_all, enforce it. |
|
*/ |
|
dsec = NULL; |
|
dform = FORM_NONE; |
|
cp = ff->fts_name; |
|
if (FTS_DP == ff->fts_info) |
|
break; |
|
|
if (NULL == mdb.db) { |
if (0 == strncmp(cp, "man", 3)) { |
perror(mdb.dbn); |
dform = FORM_SRC; |
exit((int)MANDOCLEVEL_SYSERR); |
dsec = stradd(cp + 3); |
} else if (NULL == mdb.idx) { |
} else if (0 == strncmp(cp, "cat", 3)) { |
perror(mdb.idxn); |
dform = FORM_CAT; |
exit((int)MANDOCLEVEL_SYSERR); |
dsec = stradd(cp + 3); |
|
} |
|
|
|
if (NULL != dsec || use_all) |
|
break; |
|
|
|
if (warnings) |
|
say(path, "Unknown directory part"); |
|
fts_set(f, ff, FTS_SKIP); |
|
break; |
|
case (2): |
|
/* |
|
* Possibly our architecture. |
|
* If we're descending, keep tabs on it. |
|
*/ |
|
arch = NULL; |
|
if (FTS_DP != ff->fts_info && NULL != dsec) |
|
arch = stradd(ff->fts_name); |
|
break; |
|
default: |
|
if (FTS_DP == ff->fts_info || use_all) |
|
break; |
|
if (warnings) |
|
say(path, "Extraneous directory part"); |
|
fts_set(f, ff, FTS_SKIP); |
|
break; |
} |
} |
|
} |
|
|
ofile_argbuild(argc, argv, &of, pbuf); |
fts_close(f); |
|
return(1); |
|
} |
|
|
if (NULL == of) |
/* |
goto out; |
* Add a file to the file vector. |
|
* Do not verify that it's a "valid" looking manpage (we'll do that |
|
* later). |
|
* |
|
* Try to infer the manual section, architecture, and page name from the |
|
* path, assuming it looks like |
|
* |
|
* [./]man*[/<arch>]/<name>.<section> |
|
* or |
|
* [./]cat<section>[/<arch>]/<name>.0 |
|
* |
|
* Stuff this information directly into the "of" vector. |
|
* See treescan() for the fts(3) version of this. |
|
*/ |
|
static void |
|
filescan(const char *file) |
|
{ |
|
char buf[PATH_MAX]; |
|
const char *sec, *arch, *name, *dsec; |
|
char *p, *start; |
|
int dform; |
|
struct stat st; |
|
|
index_prune(of, &mdb, &recs); |
assert(use_all); |
|
|
/* |
if (0 == strncmp(file, "./", 2)) |
* Go to the root of the respective manual tree. |
file += 2; |
* This must work or no manuals may be found (they're |
|
* indexed relative to the root). |
|
*/ |
|
|
|
if (OP_UPDATE == op) { |
if (NULL == realpath(file, buf)) { |
if (-1 == chdir(dir)) { |
exitcode = (int)MANDOCLEVEL_BADARG; |
perror(dir); |
say(file, NULL); |
exit((int)MANDOCLEVEL_SYSERR); |
return; |
} |
} else if (strstr(buf, basedir) != buf) { |
index_merge(of, mp, &dbuf, &buf, hash, |
exitcode = (int)MANDOCLEVEL_BADARG; |
&mdb, &recs); |
say("", "%s: outside base directory", buf); |
|
return; |
|
} else if (-1 == stat(buf, &st)) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(file, NULL); |
|
return; |
|
} else if ( ! (S_IFREG & st.st_mode)) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(file, "Not a regular file"); |
|
return; |
|
} else if (inocheck(&st)) { |
|
if (warnings) |
|
say(file, "Duplicate file"); |
|
return; |
|
} |
|
start = buf + strlen(basedir); |
|
sec = arch = name = dsec = NULL; |
|
dform = FORM_NONE; |
|
|
|
/* |
|
* First try to guess our directory structure. |
|
* If we find a separator, try to look for man* or cat*. |
|
* If we find one of these and what's underneath is a directory, |
|
* assume it's an architecture. |
|
*/ |
|
if (NULL != (p = strchr(start, '/'))) { |
|
*p++ = '\0'; |
|
if (0 == strncmp(start, "man", 3)) { |
|
dform = FORM_SRC; |
|
dsec = start + 3; |
|
} else if (0 == strncmp(start, "cat", 3)) { |
|
dform = FORM_CAT; |
|
dsec = start + 3; |
} |
} |
|
|
goto out; |
start = p; |
|
if (NULL != dsec && NULL != (p = strchr(start, '/'))) { |
|
*p++ = '\0'; |
|
arch = start; |
|
start = p; |
|
} |
} |
} |
|
|
/* |
/* |
* Configure the directories we're going to scan. |
* Now check the file suffix. |
* If we have command-line arguments, use them. |
* Suffix of `.0' indicates a catpage, `.1-9' is a manpage. |
* If not, we use man(1)'s method (see mandocdb.8). |
|
*/ |
*/ |
|
p = strrchr(start, '\0'); |
|
while (p-- > start && '/' != *p && '.' != *p) |
|
/* Loop. */ ; |
|
|
if (argc > 0) { |
if ('.' == *p) { |
dirs.paths = mandoc_calloc(argc, sizeof(char *)); |
*p++ = '\0'; |
dirs.sz = argc; |
sec = p; |
for (i = 0; i < argc; i++) { |
} |
if (NULL == (cp = realpath(argv[i], pbuf))) { |
|
perror(argv[i]); |
|
goto out; |
|
} |
|
dirs.paths[i] = mandoc_strdup(cp); |
|
} |
|
} else |
|
manpath_parse(&dirs, dir, NULL, NULL); |
|
|
|
for (ipath = 0; ipath < dirs.sz; ipath++) { |
/* |
|
* Now try to parse the name. |
|
* Use the filename portion of the path. |
|
*/ |
|
name = start; |
|
if (NULL != (p = strrchr(start, '/'))) { |
|
name = p + 1; |
|
*p = '\0'; |
|
} |
|
|
/* |
ofadd(dform, file, name, dsec, sec, arch, &st); |
* Go to the root of the respective manual tree. |
} |
* This must work or no manuals may be found: |
|
* They are indexed relative to the root. |
|
*/ |
|
|
|
if (-1 == chdir(dirs.paths[ipath])) { |
/* |
perror(dirs.paths[ipath]); |
* See fileadd(). |
exit((int)MANDOCLEVEL_SYSERR); |
*/ |
} |
static int |
|
filecheck(const char *name) |
|
{ |
|
unsigned int index; |
|
|
/* Create a new database in two temporary files. */ |
index = ohash_qlookup(&filenames, name); |
|
return(NULL != ohash_find(&filenames, index)); |
|
} |
|
|
flags = O_CREAT | O_EXCL | O_RDWR; |
/* |
while (NULL == mdb.db) { |
* Use the standard hashing mechanism (K&R) to see if the given filename |
strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX); |
* already exists. |
strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX); |
*/ |
if (NULL == mktemp(mdb.dbn)) { |
static void |
perror(mdb.dbn); |
fileadd(struct of *of) |
exit((int)MANDOCLEVEL_SYSERR); |
{ |
} |
unsigned int index; |
mdb.db = dbopen(mdb.dbn, flags, 0644, |
|
DB_BTREE, &info); |
|
if (NULL == mdb.db && EEXIST != errno) { |
|
perror(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
while (NULL == mdb.idx) { |
|
strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX); |
|
strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX); |
|
if (NULL == mktemp(mdb.idxn)) { |
|
perror(mdb.idxn); |
|
unlink(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
mdb.idx = dbopen(mdb.idxn, flags, 0644, |
|
DB_RECNO, NULL); |
|
if (NULL == mdb.idx && EEXIST != errno) { |
|
perror(mdb.idxn); |
|
unlink(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
|
|
/* |
index = ohash_qlookup(&filenames, of->file); |
* Search for manuals and fill the new database. |
assert(NULL == ohash_find(&filenames, index)); |
*/ |
ohash_insert(&filenames, index, of); |
|
} |
|
|
ofile_dirbuild(".", "", "", 0, &of); |
/* |
|
* See inoadd(). |
|
*/ |
|
static int |
|
inocheck(const struct stat *st) |
|
{ |
|
struct id id; |
|
uint32_t hash; |
|
unsigned int index; |
|
|
if (NULL != of) { |
memset(&id, 0, sizeof(id)); |
index_merge(of, mp, &dbuf, &buf, hash, |
id.ino = hash = st->st_ino; |
&mdb, &recs); |
id.dev = st->st_dev; |
ofile_free(of); |
index = ohash_lookup_memory |
of = NULL; |
(&inos, (char *)&id, sizeof(id), hash); |
} |
|
|
|
(*mdb.db->close)(mdb.db); |
return(NULL != ohash_find(&inos, index)); |
(*mdb.idx->close)(mdb.idx); |
} |
mdb.db = NULL; |
|
mdb.idx = NULL; |
|
|
|
/* |
/* |
* Replace the old database with the new one. |
* The hashing function used here is quite simple: simply take the inode |
* This is not perfectly atomic, |
* and use uint32_t of its bits. |
* but i cannot think of a better way. |
* Then when we do the lookup, use both the inode and device identifier. |
*/ |
*/ |
|
static void |
|
inoadd(const struct stat *st, struct of *of) |
|
{ |
|
uint32_t hash; |
|
unsigned int index; |
|
|
if (-1 == rename(mdb.dbn, MANDOC_DB)) { |
of->id.ino = hash = st->st_ino; |
perror(MANDOC_DB); |
of->id.dev = st->st_dev; |
unlink(mdb.dbn); |
index = ohash_lookup_memory |
unlink(mdb.idxn); |
(&inos, (char *)&of->id, sizeof(of->id), hash); |
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
if (-1 == rename(mdb.idxn, MANDOC_IDX)) { |
|
perror(MANDOC_IDX); |
|
unlink(MANDOC_DB); |
|
unlink(MANDOC_IDX); |
|
unlink(mdb.idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
|
|
out: |
assert(NULL == ohash_find(&inos, index)); |
if (mdb.db) |
ohash_insert(&inos, index, of); |
(*mdb.db->close)(mdb.db); |
} |
if (mdb.idx) |
|
(*mdb.idx->close)(mdb.idx); |
|
if (hash) |
|
(*hash->close)(hash); |
|
if (mp) |
|
mparse_free(mp); |
|
|
|
manpath_free(&dirs); |
static void |
ofile_free(of); |
ofadd(int dform, const char *file, const char *name, const char *dsec, |
free(buf.cp); |
const char *sec, const char *arch, const struct stat *st) |
free(dbuf.cp); |
{ |
free(recs.stack); |
struct of *of; |
|
int sform; |
|
|
return(MANDOCLEVEL_OK); |
assert(NULL != file); |
|
|
usage: |
if (NULL == name) |
fprintf(stderr, |
name = ""; |
"usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" |
if (NULL == sec) |
" -d dir [file ...] | " |
sec = ""; |
"-u dir [file ...]\n", |
if (NULL == dsec) |
progname); |
dsec = ""; |
|
if (NULL == arch) |
|
arch = ""; |
|
|
return((int)MANDOCLEVEL_BADARG); |
sform = FORM_NONE; |
|
if (NULL != sec && *sec <= '9' && *sec >= '1') |
|
sform = FORM_SRC; |
|
else if (NULL != sec && *sec == '0') { |
|
sec = dsec; |
|
sform = FORM_CAT; |
|
} |
|
|
|
of = mandoc_calloc(1, sizeof(struct of)); |
|
strlcpy(of->file, file, PATH_MAX); |
|
of->name = name; |
|
of->sec = sec; |
|
of->dsec = dsec; |
|
of->arch = arch; |
|
of->sform = sform; |
|
of->dform = dform; |
|
of->next = ofs; |
|
ofs = of; |
|
|
|
/* |
|
* Add to unique identifier hash. |
|
* Then if it's a source manual and we're going to use source in |
|
* favour of catpages, add it to that hash. |
|
*/ |
|
inoadd(st, of); |
|
fileadd(of); |
} |
} |
|
|
void |
static void |
index_merge(const struct of *of, struct mparse *mp, |
offree(void) |
struct buf *dbuf, struct buf *buf, DB *hash, |
|
struct mdb *mdb, struct recs *recs) |
|
{ |
{ |
recno_t rec; |
struct of *of; |
int ch, skip; |
|
DBT key, val; |
|
DB *files; /* temporary file name table */ |
|
struct mdoc *mdoc; |
|
struct man *man; |
|
const char *fn, *msec, *march, *mtitle; |
|
char *p; |
|
uint64_t mask; |
|
size_t sv; |
|
unsigned seq; |
|
uint64_t vbuf[2]; |
|
char type; |
|
|
|
static char emptystring[] = ""; |
while (NULL != (of = ofs)) { |
|
ofs = of->next; |
if (warnings) { |
free(of); |
files = NULL; |
|
hash_reset(&files); |
|
} |
} |
|
} |
|
|
rec = 0; |
/* |
for (of = of->first; of; of = of->next) { |
* Run through the files in the global vector "ofs" and add them to the |
fn = of->fname; |
* database specified in "basedir". |
|
* |
|
* This handles the parsing scheme itself, using the cues of directory |
|
* and filename to determine whether the file is parsable or not. |
|
*/ |
|
static void |
|
ofmerge(struct mchars *mc, struct mparse *mp) |
|
{ |
|
int form; |
|
size_t sz; |
|
struct mdoc *mdoc; |
|
struct man *man; |
|
char buf[PATH_MAX]; |
|
char *bufp; |
|
const char *msec, *march, *mtitle, *cp; |
|
struct of *of; |
|
enum mandoclevel lvl; |
|
|
|
for (of = ofs; NULL != of; of = of->next) { |
/* |
/* |
* Try interpreting the file as mdoc(7) or man(7) |
* If we're a catpage (as defined by our path), then see |
* source code, unless it is already known to be |
* if a manpage exists by the same name (ignoring the |
* formatted. Fall back to formatted mode. |
* suffix). |
|
* If it does, then we want to use it instead of our |
|
* own. |
*/ |
*/ |
|
if ( ! use_all && FORM_CAT == of->dform) { |
|
sz = strlcpy(buf, of->file, PATH_MAX); |
|
if (sz >= PATH_MAX) { |
|
if (warnings) |
|
say(of->file, "Filename too long"); |
|
continue; |
|
} |
|
bufp = strstr(buf, "cat"); |
|
assert(NULL != bufp); |
|
memcpy(bufp, "man", 3); |
|
if (NULL != (bufp = strrchr(buf, '.'))) |
|
*++bufp = '\0'; |
|
strlcat(buf, of->dsec, PATH_MAX); |
|
if (filecheck(buf)) { |
|
if (warnings) |
|
say(of->file, "Man " |
|
"source exists: %s", buf); |
|
continue; |
|
} |
|
} |
|
|
|
words = NULL; |
mparse_reset(mp); |
mparse_reset(mp); |
mdoc = NULL; |
mdoc = NULL; |
man = NULL; |
man = NULL; |
|
form = 0; |
|
msec = of->dsec; |
|
march = of->arch; |
|
mtitle = of->name; |
|
|
if ((MANDOC_SRC & of->src_form || |
/* |
! (MANDOC_FORM & of->src_form)) && |
* Try interpreting the file as mdoc(7) or man(7) |
MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) |
* source code, unless it is already known to be |
mparse_result(mp, &mdoc, &man); |
* formatted. Fall back to formatted mode. |
|
*/ |
|
if (FORM_SRC == of->dform || FORM_SRC == of->sform) { |
|
lvl = mparse_readfd(mp, -1, of->file); |
|
if (lvl < MANDOCLEVEL_FATAL) |
|
mparse_result(mp, &mdoc, &man); |
|
} |
|
|
if (NULL != mdoc) { |
if (NULL != mdoc) { |
|
form = 1; |
msec = mdoc_meta(mdoc)->msec; |
msec = mdoc_meta(mdoc)->msec; |
march = mdoc_meta(mdoc)->arch; |
march = mdoc_meta(mdoc)->arch; |
if (NULL == march) |
|
march = ""; |
|
mtitle = mdoc_meta(mdoc)->title; |
mtitle = mdoc_meta(mdoc)->title; |
} else if (NULL != man) { |
} else if (NULL != man) { |
|
form = 1; |
msec = man_meta(man)->msec; |
msec = man_meta(man)->msec; |
march = ""; |
march = ""; |
mtitle = man_meta(man)->title; |
mtitle = man_meta(man)->title; |
} else { |
} |
msec = of->sec; |
|
march = of->arch; |
|
mtitle = of->title; |
|
} |
|
|
|
|
if (NULL == msec) |
|
msec = ""; |
|
if (NULL == march) |
|
march = ""; |
|
if (NULL == mtitle) |
|
mtitle = ""; |
|
|
/* |
/* |
* Check whether the manual section given in a file |
* Check whether the manual section given in a file |
* agrees with the directory where the file is located. |
* agrees with the directory where the file is located. |
Line 673 index_merge(const struct of *of, struct mparse *mp, |
|
Line 999 index_merge(const struct of *of, struct mparse *mp, |
|
* section, like encrypt(1) = makekey(8). Do not skip |
* section, like encrypt(1) = makekey(8). Do not skip |
* manuals for such reasons. |
* manuals for such reasons. |
*/ |
*/ |
|
if (warnings && !use_all && form && |
|
strcasecmp(msec, of->dsec)) |
|
say(of->file, "Section \"%s\" " |
|
"manual in %s directory", |
|
msec, of->dsec); |
|
|
skip = 0; |
|
assert(of->sec); |
|
assert(msec); |
|
if (warnings) |
|
if (strcasecmp(msec, of->sec)) |
|
fprintf(stderr, "%s: " |
|
"section \"%s\" manual " |
|
"in \"%s\" directory\n", |
|
fn, msec, of->sec); |
|
|
|
/* |
/* |
* Manual page directories exist for each kernel |
* Manual page directories exist for each kernel |
* architecture as returned by machine(1). |
* architecture as returned by machine(1). |
Line 698 index_merge(const struct of *of, struct mparse *mp, |
|
Line 1019 index_merge(const struct of *of, struct mparse *mp, |
|
* Thus, warn about architecture mismatches, |
* Thus, warn about architecture mismatches, |
* but don't skip manuals for this reason. |
* but don't skip manuals for this reason. |
*/ |
*/ |
|
if (warnings && !use_all && strcasecmp(march, of->arch)) |
|
say(of->file, "Architecture \"%s\" " |
|
"manual in \"%s\" directory", |
|
march, of->arch); |
|
|
assert(of->arch); |
putkey(of, of->name, TYPE_Nm); |
assert(march); |
|
if (warnings) |
|
if (strcasecmp(march, of->arch)) |
|
fprintf(stderr, "%s: " |
|
"architecture \"%s\" manual " |
|
"in \"%s\" directory\n", |
|
fn, march, of->arch); |
|
|
|
/* |
if (NULL != mdoc) { |
* By default, skip a file if the title given |
if (NULL != (cp = mdoc_meta(mdoc)->name)) |
* in the file disagrees with the file name. |
putkey(of, cp, TYPE_Nm); |
* Do not warn, this happens for all MLINKs. |
parse_mdoc(of, mdoc_node(mdoc)); |
*/ |
} else if (NULL != man) |
|
parse_man(of, man_node(man)); |
|
else |
|
parse_catpage(of); |
|
|
assert(of->title); |
dbindex(mc, form, of); |
assert(mtitle); |
} |
if (strcasecmp(mtitle, of->title)) |
} |
skip = 1; |
|
|
|
/* |
static void |
* Build a title string for the file. If it matches |
parse_catpage(struct of *of) |
* the location of the file, remember the title as |
{ |
* found; else, remember it as missing. |
FILE *stream; |
*/ |
char *line, *p, *title; |
|
size_t len, plen, titlesz; |
|
|
if (warnings) { |
if (NULL == (stream = fopen(of->file, "r"))) { |
buf->len = 0; |
if (warnings) |
buf_appendb(buf, mtitle, strlen(mtitle)); |
say(of->file, NULL); |
buf_appendb(buf, "(", 1); |
return; |
buf_appendb(buf, msec, strlen(msec)); |
} |
if ('\0' != *march) { |
|
buf_appendb(buf, "/", 1); |
|
buf_appendb(buf, march, strlen(march)); |
|
} |
|
buf_appendb(buf, ")", 2); |
|
for (p = buf->cp; '\0' != *p; p++) |
|
*p = tolower(*p); |
|
key.data = buf->cp; |
|
key.size = buf->len; |
|
val.data = NULL; |
|
val.size = 0; |
|
if (0 == skip) |
|
val.data = emptystring; |
|
else { |
|
ch = (*files->get)(files, &key, &val, 0); |
|
if (ch < 0) { |
|
perror("hash"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} else if (ch > 0) { |
|
val.data = (void *)fn; |
|
val.size = strlen(fn) + 1; |
|
} else |
|
val.data = NULL; |
|
} |
|
if (NULL != val.data && |
|
(*files->put)(files, &key, &val, 0) < 0) { |
|
perror("hash"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
|
|
if (skip && !use_all) |
/* Skip to first blank line. */ |
continue; |
|
|
|
/* |
while (NULL != (line = fgetln(stream, &len))) |
* The index record value consists of a nil-terminated |
if ('\n' == *line) |
* filename, a nil-terminated manual section, and a |
break; |
* nil-terminated description. Use the actual |
|
* location of the file, such that the user can find |
|
* it with man(1). Since the description may not be |
|
* set, we set a sentinel to see if we're going to |
|
* write a nil byte in its place. |
|
*/ |
|
|
|
dbuf->len = 0; |
/* |
type = mdoc ? 'd' : (man ? 'a' : 'c'); |
* Assume the first line that is not indented |
buf_appendb(dbuf, &type, 1); |
* is the first section header. Skip to it. |
buf_appendb(dbuf, fn, strlen(fn) + 1); |
*/ |
buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); |
|
buf_appendb(dbuf, of->title, strlen(of->title) + 1); |
|
buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); |
|
|
|
sv = dbuf->len; |
while (NULL != (line = fgetln(stream, &len))) |
|
if ('\n' != *line && ' ' != *line) |
|
break; |
|
|
|
/* |
|
* Read up until the next section into a buffer. |
|
* Strip the leading and trailing newline from each read line, |
|
* appending a trailing space. |
|
* Ignore empty (whitespace-only) lines. |
|
*/ |
|
|
/* |
titlesz = 0; |
* Collect keyword/mask pairs. |
title = NULL; |
* Each pair will become a new btree node. |
|
*/ |
|
|
|
hash_reset(&hash); |
while (NULL != (line = fgetln(stream, &len))) { |
if (mdoc) |
if (' ' != *line || '\n' != line[len - 1]) |
pmdoc_node(hash, buf, dbuf, |
break; |
mdoc_node(mdoc), mdoc_meta(mdoc)); |
while (len > 0 && isspace((unsigned char)*line)) { |
else if (man) |
line++; |
pman_node(hash, buf, dbuf, man_node(man)); |
len--; |
else |
} |
pformatted(hash, buf, dbuf, of); |
if (1 == len) |
|
|
/* Test mode, do not access any database. */ |
|
|
|
if (NULL == mdb->db || NULL == mdb->idx) |
|
continue; |
continue; |
|
title = mandoc_realloc(title, titlesz + len); |
|
memcpy(title + titlesz, line, len); |
|
titlesz += len; |
|
title[titlesz - 1] = ' '; |
|
} |
|
|
/* |
/* |
* Make sure the file name is always registered |
* If no page content can be found, or the input line |
* as an .Nm search key. |
* is already the next section header, or there is no |
*/ |
* trailing newline, reuse the page title as the page |
buf->len = 0; |
* description. |
buf_append(buf, of->title); |
*/ |
hash_put(hash, buf, TYPE_Nm); |
|
|
|
/* |
if (NULL == title || '\0' == *title) { |
* Reclaim an empty index record, if available. |
if (warnings) |
* Use its record number for all new btree nodes. |
say(of->file, "Cannot find NAME section"); |
*/ |
putkey(of, of->name, TYPE_Nd); |
|
fclose(stream); |
|
free(title); |
|
return; |
|
} |
|
|
if (recs->cur > 0) { |
title = mandoc_realloc(title, titlesz + 1); |
recs->cur--; |
title[titlesz] = '\0'; |
rec = recs->stack[(int)recs->cur]; |
|
} else if (recs->last > 0) { |
|
rec = recs->last; |
|
recs->last = 0; |
|
} else |
|
rec++; |
|
vbuf[1] = htobe64(rec); |
|
|
|
/* |
/* |
* Copy from the in-memory hashtable of pending |
* Skip to the first dash. |
* keyword/mask pairs into the database. |
* Use the remaining line as the description (no more than 70 |
*/ |
* bytes). |
|
*/ |
|
|
seq = R_FIRST; |
if (NULL != (p = strstr(title, "- "))) { |
while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { |
for (p += 2; ' ' == *p || '\b' == *p; p++) |
seq = R_NEXT; |
/* Skip to next word. */ ; |
assert(sizeof(uint64_t) == val.size); |
} else { |
memcpy(&mask, val.data, val.size); |
if (warnings) |
vbuf[0] = htobe64(mask); |
say(of->file, "No dash in title line"); |
val.size = sizeof(vbuf); |
p = title; |
val.data = &vbuf; |
} |
dbt_put(mdb->db, mdb->dbn, &key, &val); |
|
} |
|
if (ch < 0) { |
|
perror("hash"); |
|
unlink(mdb->dbn); |
|
unlink(mdb->idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
/* |
plen = strlen(p); |
* Apply to the index. If we haven't had a description |
|
* set, put an empty one in now. |
|
*/ |
|
|
|
if (dbuf->len == sv) |
/* Strip backspace-encoding from line. */ |
buf_appendb(dbuf, "", 1); |
|
|
|
key.data = &rec; |
while (NULL != (line = memchr(p, '\b', plen))) { |
key.size = sizeof(recno_t); |
len = line - p; |
|
if (0 == len) { |
val.data = dbuf->cp; |
memmove(line, line + 1, plen--); |
val.size = dbuf->len; |
continue; |
|
} |
if (verb) |
memmove(line - 1, line + 1, plen - len); |
printf("%s: adding to index\n", fn); |
plen -= 2; |
|
|
dbt_put(mdb->idx, mdb->idxn, &key, &val); |
|
} |
} |
|
|
/* |
of->desc = stradd(p); |
* Iterate the remembered file titles and check that |
putkey(of, p, TYPE_Nd); |
* all files can be found by their main title. |
fclose(stream); |
*/ |
free(title); |
|
|
if (warnings) { |
|
seq = R_FIRST; |
|
while (0 == (*files->seq)(files, &key, &val, seq)) { |
|
seq = R_NEXT; |
|
if (val.size) |
|
fprintf(stderr, "%s: probably " |
|
"unreachable, title is %s\n", |
|
(char *)val.data, (char *)key.data); |
|
} |
|
(*files->close)(files); |
|
} |
|
} |
} |
|
|
/* |
/* |
* Scan through all entries in the index file `idx' and prune those |
* Put a type/word pair into the word database for this particular file. |
* entries in `ofile'. |
|
* Pruning consists of removing from `db', then invalidating the entry |
|
* in `idx' (zeroing its value size). |
|
*/ |
*/ |
static void |
static void |
index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) |
putkey(const struct of *of, const char *value, uint64_t type) |
{ |
{ |
const struct of *of; |
|
const char *fn; |
|
uint64_t vbuf[2]; |
|
unsigned seq, sseq; |
|
DBT key, val; |
|
int ch; |
|
|
|
recs->cur = 0; |
assert(NULL != value); |
seq = R_FIRST; |
wordaddbuf(of, value, strlen(value), type); |
while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { |
} |
seq = R_NEXT; |
|
assert(sizeof(recno_t) == key.size); |
|
memcpy(&recs->last, key.data, key.size); |
|
|
|
/* Deleted records are zero-sized. Skip them. */ |
/* |
|
* Like putkey() but for unterminated strings. |
|
*/ |
|
static void |
|
putkeys(const struct of *of, const char *value, int sz, uint64_t type) |
|
{ |
|
|
if (0 == val.size) |
wordaddbuf(of, value, sz, type); |
goto cont; |
} |
|
|
/* |
/* |
* Make sure we're sane. |
* Grok all nodes at or below a certain mdoc node into putkey(). |
* Read past our mdoc/man/cat type to the next string, |
*/ |
* then make sure it's bounded by a NUL. |
static void |
* Failing any of these, we go into our error handler. |
putmdockey(const struct of *of, const struct mdoc_node *n, uint64_t m) |
*/ |
{ |
|
|
fn = (char *)val.data + 1; |
for ( ; NULL != n; n = n->next) { |
if (NULL == memchr(fn, '\0', val.size - 1)) |
if (NULL != n->child) |
break; |
putmdockey(of, n->child, m); |
|
if (MDOC_TEXT == n->type) |
|
putkey(of, n->string, m); |
|
} |
|
} |
|
|
/* |
static void |
* Search for the file in those we care about. |
parse_man(struct of *of, const struct man_node *n) |
* XXX: build this into a tree. Too slow. |
{ |
*/ |
const struct man_node *head, *body; |
|
char *start, *sv, *title; |
|
char byte; |
|
size_t sz, titlesz; |
|
|
for (of = ofile->first; of; of = of->next) |
if (NULL == n) |
if (0 == strcmp(fn, of->fname)) |
return; |
break; |
|
|
|
if (NULL == of) |
/* |
continue; |
* We're only searching for one thing: the first text child in |
|
* the BODY of a NAME section. Since we don't keep track of |
|
* sections in -man, run some hoops to find out whether we're in |
|
* the correct section or not. |
|
*/ |
|
|
/* |
if (MAN_BODY == n->type && MAN_SH == n->tok) { |
* Search through the keyword database, throwing out all |
body = n; |
* references to our file. |
assert(body->parent); |
*/ |
if (NULL != (head = body->parent->head) && |
|
1 == head->nchild && |
|
NULL != (head = (head->child)) && |
|
MAN_TEXT == head->type && |
|
0 == strcmp(head->string, "NAME") && |
|
NULL != (body = body->child) && |
|
MAN_TEXT == body->type) { |
|
|
sseq = R_FIRST; |
title = NULL; |
while (0 == (ch = (*mdb->db->seq)(mdb->db, |
titlesz = 0; |
&key, &val, sseq))) { |
|
sseq = R_NEXT; |
|
if (sizeof(vbuf) != val.size) |
|
break; |
|
|
|
memcpy(vbuf, val.data, val.size); |
/* |
if (recs->last != betoh64(vbuf[1])) |
* Suck the entire NAME section into memory. |
continue; |
* Yes, we might run away. |
|
* But too many manuals have big, spread-out |
|
* NAME sections over many lines. |
|
*/ |
|
|
if ((ch = (*mdb->db->del)(mdb->db, |
for ( ; NULL != body; body = body->next) { |
&key, R_CURSOR)) < 0) |
if (MAN_TEXT != body->type) |
break; |
break; |
} |
if (0 == (sz = strlen(body->string))) |
|
continue; |
|
title = mandoc_realloc |
|
(title, titlesz + sz + 1); |
|
memcpy(title + titlesz, body->string, sz); |
|
titlesz += sz + 1; |
|
title[titlesz - 1] = ' '; |
|
} |
|
if (NULL == title) |
|
return; |
|
|
if (ch < 0) { |
title = mandoc_realloc(title, titlesz + 1); |
perror(mdb->dbn); |
title[titlesz] = '\0'; |
exit((int)MANDOCLEVEL_SYSERR); |
|
} else if (1 != ch) { |
|
fprintf(stderr, "%s: corrupt database\n", |
|
mdb->dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
if (verb) |
/* Skip leading space. */ |
printf("%s: deleting from index\n", fn); |
|
|
|
val.size = 0; |
sv = title; |
ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); |
while (isspace((unsigned char)*sv)) |
|
sv++; |
|
|
if (ch < 0) |
if (0 == (sz = strlen(sv))) { |
break; |
free(title); |
cont: |
return; |
if (recs->cur >= recs->size) { |
} |
recs->size += MANDOC_SLOP; |
|
recs->stack = mandoc_realloc(recs->stack, |
|
recs->size * sizeof(recno_t)); |
|
} |
|
|
|
recs->stack[(int)recs->cur] = recs->last; |
/* Erase trailing space. */ |
recs->cur++; |
|
} |
|
|
|
if (ch < 0) { |
start = &sv[sz - 1]; |
perror(mdb->idxn); |
while (start > sv && isspace((unsigned char)*start)) |
exit((int)MANDOCLEVEL_SYSERR); |
*start-- = '\0'; |
} else if (1 != ch) { |
|
fprintf(stderr, "%s: corrupt index\n", mdb->idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
recs->last++; |
if (start == sv) { |
} |
free(title); |
|
return; |
|
} |
|
|
/* |
start = sv; |
* Grow the buffer (if necessary) and copy in a binary string. |
|
*/ |
|
static void |
|
buf_appendb(struct buf *buf, const void *cp, size_t sz) |
|
{ |
|
|
|
/* Overshoot by MANDOC_BUFSZ. */ |
/* |
|
* Go through a special heuristic dance here. |
|
* Conventionally, one or more manual names are |
|
* comma-specified prior to a whitespace, then a |
|
* dash, then a description. Try to puzzle out |
|
* the name parts here. |
|
*/ |
|
|
while (buf->len + sz >= buf->size) { |
for ( ;; ) { |
buf->size = buf->len + sz + MANDOC_BUFSZ; |
sz = strcspn(start, " ,"); |
buf->cp = mandoc_realloc(buf->cp, buf->size); |
if ('\0' == start[sz]) |
} |
break; |
|
|
memcpy(buf->cp + (int)buf->len, cp, sz); |
byte = start[sz]; |
buf->len += sz; |
start[sz] = '\0'; |
} |
|
|
|
/* |
putkey(of, start, TYPE_Nm); |
* Append a nil-terminated string to the buffer. |
|
* This can be invoked multiple times. |
|
* The buffer string will be nil-terminated. |
|
* If invoked multiple times, a space is put between strings. |
|
*/ |
|
static void |
|
buf_append(struct buf *buf, const char *cp) |
|
{ |
|
size_t sz; |
|
|
|
if (0 == (sz = strlen(cp))) |
if (' ' == byte) { |
return; |
start += sz + 1; |
|
break; |
|
} |
|
|
if (buf->len) |
assert(',' == byte); |
buf->cp[(int)buf->len - 1] = ' '; |
start += sz + 1; |
|
while (' ' == *start) |
|
start++; |
|
} |
|
|
buf_appendb(buf, cp, sz + 1); |
if (sv == start) { |
} |
putkey(of, start, TYPE_Nm); |
|
free(title); |
|
return; |
|
} |
|
|
/* |
while (isspace((unsigned char)*start)) |
* Recursively add all text from a given node. |
start++; |
* This is optimised for general mdoc nodes in this context, which do |
|
* not consist of subexpressions and having a recursive call for n->next |
|
* would be wasteful. |
|
* The "f" variable should be 0 unless called from pmdoc_Nd for the |
|
* description buffer, which does not start at the beginning of the |
|
* buffer. |
|
*/ |
|
static void |
|
buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) |
|
{ |
|
|
|
for ( ; n; n = n->next) { |
if (0 == strncmp(start, "-", 1)) |
if (n->child) |
start += 1; |
buf_appendmdoc(buf, n->child, f); |
else if (0 == strncmp(start, "\\-\\-", 4)) |
|
start += 4; |
|
else if (0 == strncmp(start, "\\-", 2)) |
|
start += 2; |
|
else if (0 == strncmp(start, "\\(en", 4)) |
|
start += 4; |
|
else if (0 == strncmp(start, "\\(em", 4)) |
|
start += 4; |
|
|
if (MDOC_TEXT == n->type && f) { |
while (' ' == *start) |
f = 0; |
start++; |
buf_appendb(buf, n->string, |
|
strlen(n->string) + 1); |
|
} else if (MDOC_TEXT == n->type) |
|
buf_append(buf, n->string); |
|
|
|
|
assert(NULL == of->desc); |
|
of->desc = stradd(start); |
|
putkey(of, start, TYPE_Nd); |
|
free(title); |
|
return; |
|
} |
} |
} |
|
|
|
for (n = n->child; n; n = n->next) |
|
parse_man(of, n); |
} |
} |
|
|
static void |
static void |
hash_reset(DB **db) |
parse_mdoc(struct of *of, const struct mdoc_node *n) |
{ |
{ |
DB *hash; |
|
|
|
if (NULL != (hash = *db)) |
assert(NULL != n); |
(*hash->close)(hash); |
for (n = n->child; NULL != n; n = n->next) { |
|
switch (n->type) { |
|
case (MDOC_ELEM): |
|
/* FALLTHROUGH */ |
|
case (MDOC_BLOCK): |
|
/* FALLTHROUGH */ |
|
case (MDOC_HEAD): |
|
/* FALLTHROUGH */ |
|
case (MDOC_BODY): |
|
/* FALLTHROUGH */ |
|
case (MDOC_TAIL): |
|
if (NULL != mdocs[n->tok].fp) |
|
if (0 == (*mdocs[n->tok].fp)(of, n)) |
|
break; |
|
|
*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); |
if (MDOCF_CHILD & mdocs[n->tok].flags) |
if (NULL == *db) { |
putmdockey(of, n->child, mdocs[n->tok].mask); |
perror("hash"); |
break; |
exit((int)MANDOCLEVEL_SYSERR); |
default: |
|
assert(MDOC_ROOT != n->type); |
|
continue; |
|
} |
|
if (NULL != n->child) |
|
parse_mdoc(of, n); |
} |
} |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_head(MDOC_ARGS) |
parse_mdoc_Fd(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
return(MDOC_HEAD == n->type); |
|
} |
|
|
|
/* ARGSUSED */ |
|
static int |
|
pmdoc_body(MDOC_ARGS) |
|
{ |
|
|
|
return(MDOC_BODY == n->type); |
|
} |
|
|
|
/* ARGSUSED */ |
|
static int |
|
pmdoc_Fd(MDOC_ARGS) |
|
{ |
|
const char *start, *end; |
const char *start, *end; |
size_t sz; |
size_t sz; |
|
|
if (SEC_SYNOPSIS != n->sec) |
if (SEC_SYNOPSIS != n->sec || |
|
NULL == (n = n->child) || |
|
MDOC_TEXT != n->type) |
return(0); |
return(0); |
if (NULL == (n = n->child) || MDOC_TEXT != n->type) |
|
return(0); |
|
|
|
/* |
/* |
* Only consider those `Fd' macro fields that begin with an |
* Only consider those `Fd' macro fields that begin with an |
* "inclusion" token (versus, e.g., #define). |
* "inclusion" token (versus, e.g., #define). |
*/ |
*/ |
|
|
if (strcmp("#include", n->string)) |
if (strcmp("#include", n->string)) |
return(0); |
return(0); |
|
|
Line 1131 pmdoc_Fd(MDOC_ARGS) |
|
Line 1396 pmdoc_Fd(MDOC_ARGS) |
|
if ('>' == *end || '"' == *end) |
if ('>' == *end || '"' == *end) |
end--; |
end--; |
|
|
assert(end >= start); |
if (end > start) |
|
putkeys(of, start, end - start + 1, TYPE_In); |
buf_appendb(buf, start, (size_t)(end - start + 1)); |
|
buf_appendb(buf, "", 1); |
|
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_In(MDOC_ARGS) |
parse_mdoc_In(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
if (NULL == n->child || MDOC_TEXT != n->child->type) |
if (NULL != n->child && MDOC_TEXT == n->child->type) |
return(0); |
return(0); |
|
|
buf_append(buf, n->child->string); |
putkey(of, n->child->string, TYPE_In); |
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_Fn(MDOC_ARGS) |
parse_mdoc_Fn(struct of *of, const struct mdoc_node *n) |
{ |
{ |
struct mdoc_node *nn; |
|
const char *cp; |
const char *cp; |
|
|
nn = n->child; |
if (NULL == (n = n->child) || MDOC_TEXT != n->type) |
|
|
if (NULL == nn || MDOC_TEXT != nn->type) |
|
return(0); |
return(0); |
|
|
/* .Fn "struct type *name" "char *arg" */ |
/* |
|
* Parse: .Fn "struct type *name" "char *arg". |
|
* First strip away pointer symbol. |
|
* Then store the function name, then type. |
|
* Finally, store the arguments. |
|
*/ |
|
|
cp = strrchr(nn->string, ' '); |
if (NULL == (cp = strrchr(n->string, ' '))) |
if (NULL == cp) |
cp = n->string; |
cp = nn->string; |
|
|
|
/* Strip away pointer symbol. */ |
|
|
|
while ('*' == *cp) |
while ('*' == *cp) |
cp++; |
cp++; |
|
|
/* Store the function name. */ |
putkey(of, cp, TYPE_Fn); |
|
|
buf_append(buf, cp); |
if (n->string < cp) |
hash_put(hash, buf, TYPE_Fn); |
putkeys(of, n->string, cp - n->string, TYPE_Ft); |
|
|
/* Store the function type. */ |
for (n = n->next; NULL != n; n = n->next) |
|
if (MDOC_TEXT == n->type) |
|
putkey(of, n->string, TYPE_Fa); |
|
|
if (nn->string < cp) { |
|
buf->len = 0; |
|
buf_appendb(buf, nn->string, cp - nn->string); |
|
buf_appendb(buf, "", 1); |
|
hash_put(hash, buf, TYPE_Ft); |
|
} |
|
|
|
/* Store the arguments. */ |
|
|
|
for (nn = nn->next; nn; nn = nn->next) { |
|
if (MDOC_TEXT != nn->type) |
|
continue; |
|
buf->len = 0; |
|
buf_append(buf, nn->string); |
|
hash_put(hash, buf, TYPE_Fa); |
|
} |
|
|
|
return(0); |
return(0); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_St(MDOC_ARGS) |
parse_mdoc_St(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
if (NULL == n->child || MDOC_TEXT != n->child->type) |
if (NULL == n->child || MDOC_TEXT != n->child->type) |
return(0); |
return(0); |
|
|
buf_append(buf, n->child->string); |
putkey(of, n->child->string, TYPE_St); |
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_Xr(MDOC_ARGS) |
parse_mdoc_Xr(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
if (NULL == (n = n->child)) |
if (NULL == (n = n->child)) |
return(0); |
return(0); |
|
|
buf_appendb(buf, n->string, strlen(n->string)); |
putkey(of, n->string, TYPE_Xr); |
|
|
if (NULL != (n = n->next)) { |
|
buf_appendb(buf, ".", 1); |
|
buf_appendb(buf, n->string, strlen(n->string) + 1); |
|
} else |
|
buf_appendb(buf, ".", 2); |
|
|
|
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_Nd(MDOC_ARGS) |
parse_mdoc_Nd(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
size_t sz; |
|
char *sv, *desc; |
|
|
if (MDOC_BODY != n->type) |
if (MDOC_BODY != n->type) |
return(0); |
return(0); |
|
|
buf_appendmdoc(dbuf, n->child, 1); |
/* |
|
* Special-case the `Nd' because we need to put the description |
|
* into the document table. |
|
*/ |
|
|
|
desc = NULL; |
|
for (n = n->child; NULL != n; n = n->next) { |
|
if (MDOC_TEXT == n->type) { |
|
sz = strlen(n->string) + 1; |
|
if (NULL != (sv = desc)) |
|
sz += strlen(desc) + 1; |
|
desc = mandoc_realloc(desc, sz); |
|
if (NULL != sv) |
|
strlcat(desc, " ", sz); |
|
else |
|
*desc = '\0'; |
|
strlcat(desc, n->string, sz); |
|
} |
|
if (NULL != n->child) |
|
parse_mdoc_Nd(of, n); |
|
} |
|
|
|
of->desc = NULL != desc ? stradd(desc) : NULL; |
|
free(desc); |
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_Nm(MDOC_ARGS) |
parse_mdoc_Nm(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
if (SEC_NAME == n->sec) |
if (SEC_NAME == n->sec) |
Line 1253 pmdoc_Nm(MDOC_ARGS) |
|
Line 1512 pmdoc_Nm(MDOC_ARGS) |
|
else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) |
else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) |
return(0); |
return(0); |
|
|
if (NULL == n->child) |
|
buf_append(buf, m->name); |
|
|
|
return(1); |
return(1); |
} |
} |
|
|
/* ARGSUSED */ |
|
static int |
static int |
pmdoc_Sh(MDOC_ARGS) |
parse_mdoc_Sh(struct of *of, const struct mdoc_node *n) |
{ |
{ |
|
|
return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); |
return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); |
} |
} |
|
|
static void |
static int |
hash_put(DB *db, const struct buf *buf, uint64_t mask) |
parse_mdoc_head(struct of *of, const struct mdoc_node *n) |
{ |
{ |
uint64_t oldmask; |
|
DBT key, val; |
|
int rc; |
|
|
|
if (buf->len < 2) |
return(MDOC_HEAD == n->type); |
return; |
} |
|
|
key.data = buf->cp; |
static int |
key.size = buf->len; |
parse_mdoc_body(struct of *of, const struct mdoc_node *n) |
|
{ |
|
|
if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { |
return(MDOC_BODY == n->type); |
perror("hash"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} else if (0 == rc) { |
|
assert(sizeof(uint64_t) == val.size); |
|
memcpy(&oldmask, val.data, val.size); |
|
mask |= oldmask; |
|
} |
|
|
|
val.data = &mask; |
|
val.size = sizeof(uint64_t); |
|
|
|
if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { |
|
perror("hash"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
} |
|
|
static void |
/* |
dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) |
* See straddbuf(). |
|
*/ |
|
static char * |
|
stradd(const char *cp) |
{ |
{ |
|
|
assert(key->size); |
return(straddbuf(cp, strlen(cp))); |
assert(val->size); |
|
|
|
if (0 == (*db->put)(db, key, val, 0)) |
|
return; |
|
|
|
perror(dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
/* NOTREACHED */ |
|
} |
} |
|
|
/* |
/* |
* Call out to per-macro handlers after clearing the persistent database |
* This looks up or adds a string to the string table. |
* key. If the macro sets the database key, flush it to the database. |
* The string table is a table of all strings encountered during parse |
|
* or file scan. |
|
* In using it, we avoid having thousands of (e.g.) "cat1" string |
|
* allocations for the "of" table. |
|
* We also have a layer atop the string table for keeping track of words |
|
* in a parse sequence (see wordaddbuf()). |
*/ |
*/ |
static void |
static char * |
pmdoc_node(MDOC_ARGS) |
straddbuf(const char *cp, size_t sz) |
{ |
{ |
|
struct str *s; |
|
unsigned int index; |
|
const char *end; |
|
|
if (NULL == n) |
if (NULL != (s = hashget(cp, sz))) |
return; |
return(s->key); |
|
|
switch (n->type) { |
s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); |
case (MDOC_HEAD): |
memcpy(s->key, cp, sz); |
/* FALLTHROUGH */ |
|
case (MDOC_BODY): |
|
/* FALLTHROUGH */ |
|
case (MDOC_TAIL): |
|
/* FALLTHROUGH */ |
|
case (MDOC_BLOCK): |
|
/* FALLTHROUGH */ |
|
case (MDOC_ELEM): |
|
buf->len = 0; |
|
|
|
/* |
end = cp + sz; |
* Both NULL handlers and handlers returning true |
index = ohash_qlookupi(&strings, cp, &end); |
* request using the data. Only skip the element |
assert(NULL == ohash_find(&strings, index)); |
* when the handler returns false. |
ohash_insert(&strings, index, s); |
*/ |
return(s->key); |
|
} |
|
|
if (NULL != mdocs[n->tok].fp && |
static struct str * |
0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) |
hashget(const char *cp, size_t sz) |
break; |
{ |
|
unsigned int index; |
|
const char *end; |
|
|
/* |
end = cp + sz; |
* For many macros, use the text from all children. |
index = ohash_qlookupi(&strings, cp, &end); |
* Set zero flags for macros not needing this. |
return(ohash_find(&strings, index)); |
* In that case, the handler must fill the buffer. |
} |
*/ |
|
|
|
if (MDOCF_CHILD & mdocs[n->tok].flags) |
/* |
buf_appendmdoc(buf, n->child, 0); |
* Add a word to the current parse sequence. |
|
* Within the hashtable of strings, we maintain a list of strings that |
|
* are currently indexed. |
|
* Each of these ("words") has a bitmask modified within the parse. |
|
* When we finish a parse, we'll dump the list, then remove the head |
|
* entry -- since the next parse will have a new "of", it can keep track |
|
* of its entries without conflict. |
|
*/ |
|
static void |
|
wordaddbuf(const struct of *of, |
|
const char *cp, size_t sz, uint64_t v) |
|
{ |
|
struct str *s; |
|
unsigned int index; |
|
const char *end; |
|
|
/* |
if (0 == sz) |
* Cover the most common case: |
return; |
* Automatically stage one string per element. |
|
* Set a zero mask for macros not needing this. |
|
* Additional staging can be done in the handler. |
|
*/ |
|
|
|
if (mdocs[n->tok].mask) |
s = hashget(cp, sz); |
hash_put(hash, buf, mdocs[n->tok].mask); |
|
break; |
if (NULL != s && of == s->of) { |
default: |
s->mask |= v; |
break; |
return; |
|
} else if (NULL == s) { |
|
s = mandoc_calloc(sizeof(struct str) + sz + 1, 1); |
|
memcpy(s->key, cp, sz); |
|
end = cp + sz; |
|
index = ohash_qlookupi(&strings, cp, &end); |
|
assert(NULL == ohash_find(&strings, index)); |
|
ohash_insert(&strings, index, s); |
} |
} |
|
|
pmdoc_node(hash, buf, dbuf, n->child, m); |
s->next = words; |
pmdoc_node(hash, buf, dbuf, n->next, m); |
s->of = of; |
|
s->mask = v; |
|
words = s; |
} |
} |
|
|
static int |
/* |
pman_node(MAN_ARGS) |
* Take a Unicode codepoint and produce its UTF-8 encoding. |
|
* This isn't the best way to do this, but it works. |
|
* The magic numbers are from the UTF-8 packaging. |
|
* They're not as scary as they seem: read the UTF-8 spec for details. |
|
*/ |
|
static size_t |
|
utf8(unsigned int cp, char out[7]) |
{ |
{ |
const struct man_node *head, *body; |
size_t rc; |
char *start, *sv, *title; |
|
size_t sz, titlesz; |
|
|
|
if (NULL == n) |
rc = 0; |
|
if (cp <= 0x0000007F) { |
|
rc = 1; |
|
out[0] = (char)cp; |
|
} else if (cp <= 0x000007FF) { |
|
rc = 2; |
|
out[0] = (cp >> 6 & 31) | 192; |
|
out[1] = (cp & 63) | 128; |
|
} else if (cp <= 0x0000FFFF) { |
|
rc = 3; |
|
out[0] = (cp >> 12 & 15) | 224; |
|
out[1] = (cp >> 6 & 63) | 128; |
|
out[2] = (cp & 63) | 128; |
|
} else if (cp <= 0x001FFFFF) { |
|
rc = 4; |
|
out[0] = (cp >> 18 & 7) | 240; |
|
out[1] = (cp >> 12 & 63) | 128; |
|
out[2] = (cp >> 6 & 63) | 128; |
|
out[3] = (cp & 63) | 128; |
|
} else if (cp <= 0x03FFFFFF) { |
|
rc = 5; |
|
out[0] = (cp >> 24 & 3) | 248; |
|
out[1] = (cp >> 18 & 63) | 128; |
|
out[2] = (cp >> 12 & 63) | 128; |
|
out[3] = (cp >> 6 & 63) | 128; |
|
out[4] = (cp & 63) | 128; |
|
} else if (cp <= 0x7FFFFFFF) { |
|
rc = 6; |
|
out[0] = (cp >> 30 & 1) | 252; |
|
out[1] = (cp >> 24 & 63) | 128; |
|
out[2] = (cp >> 18 & 63) | 128; |
|
out[3] = (cp >> 12 & 63) | 128; |
|
out[4] = (cp >> 6 & 63) | 128; |
|
out[5] = (cp & 63) | 128; |
|
} else |
return(0); |
return(0); |
|
|
/* |
out[rc] = '\0'; |
* We're only searching for one thing: the first text child in |
return(rc); |
* the BODY of a NAME section. Since we don't keep track of |
} |
* sections in -man, run some hoops to find out whether we're in |
|
* the correct section or not. |
|
*/ |
|
|
|
if (MAN_BODY == n->type && MAN_SH == n->tok) { |
/* |
body = n; |
* Store the UTF-8 version of a key, or alias the pointer if the key has |
assert(body->parent); |
* no UTF-8 transcription marks in it. |
if (NULL != (head = body->parent->head) && |
*/ |
1 == head->nchild && |
static void |
NULL != (head = (head->child)) && |
utf8key(struct mchars *mc, struct str *key) |
MAN_TEXT == head->type && |
{ |
0 == strcmp(head->string, "NAME") && |
size_t sz, bsz, pos; |
NULL != (body = body->child) && |
char utfbuf[7], res[5]; |
MAN_TEXT == body->type) { |
char *buf; |
|
const char *seq, *cpp, *val; |
|
int len, u; |
|
enum mandoc_esc esc; |
|
|
title = NULL; |
assert(NULL == key->utf8); |
titlesz = 0; |
|
/* |
|
* Suck the entire NAME section into memory. |
|
* Yes, we might run away. |
|
* But too many manuals have big, spread-out |
|
* NAME sections over many lines. |
|
*/ |
|
for ( ; NULL != body; body = body->next) { |
|
if (MAN_TEXT != body->type) |
|
break; |
|
if (0 == (sz = strlen(body->string))) |
|
continue; |
|
title = mandoc_realloc |
|
(title, titlesz + sz + 1); |
|
memcpy(title + titlesz, body->string, sz); |
|
titlesz += sz + 1; |
|
title[(int)titlesz - 1] = ' '; |
|
} |
|
if (NULL == title) |
|
return(0); |
|
|
|
title = mandoc_realloc(title, titlesz + 1); |
res[0] = '\\'; |
title[(int)titlesz] = '\0'; |
res[1] = '\t'; |
|
res[2] = ASCII_NBRSP; |
|
res[3] = ASCII_HYPH; |
|
res[4] = '\0'; |
|
|
/* Skip leading space. */ |
val = key->key; |
|
bsz = strlen(val); |
|
|
sv = title; |
/* |
while (isspace((unsigned char)*sv)) |
* Pre-check: if we have no stop-characters, then set the |
sv++; |
* pointer as ourselvse and get out of here. |
|
*/ |
|
if (strcspn(val, res) == bsz) { |
|
key->utf8 = key->key; |
|
return; |
|
} |
|
|
if (0 == (sz = strlen(sv))) { |
/* Pre-allocate by the length of the input */ |
free(title); |
|
return(0); |
|
} |
|
|
|
/* Erase trailing space. */ |
buf = mandoc_malloc(++bsz); |
|
pos = 0; |
|
|
start = &sv[sz - 1]; |
while ('\0' != *val) { |
while (start > sv && isspace((unsigned char)*start)) |
/* |
*start-- = '\0'; |
* Halt on the first escape sequence. |
|
* This also halts on the end of string, in which case |
|
* we just copy, fallthrough, and exit the loop. |
|
*/ |
|
if ((sz = strcspn(val, res)) > 0) { |
|
memcpy(&buf[pos], val, sz); |
|
pos += sz; |
|
val += sz; |
|
} |
|
|
if (start == sv) { |
if (ASCII_HYPH == *val) { |
free(title); |
buf[pos++] = '-'; |
return(0); |
val++; |
} |
continue; |
|
} else if ('\t' == *val || ASCII_NBRSP == *val) { |
|
buf[pos++] = ' '; |
|
val++; |
|
continue; |
|
} else if ('\\' != *val) |
|
break; |
|
|
start = sv; |
/* Read past the slash. */ |
|
|
/* |
val++; |
* Go through a special heuristic dance here. |
u = 0; |
* This is why -man manuals are great! |
|
* (I'm being sarcastic: my eyes are bleeding.) |
|
* Conventionally, one or more manual names are |
|
* comma-specified prior to a whitespace, then a |
|
* dash, then a description. Try to puzzle out |
|
* the name parts here. |
|
*/ |
|
|
|
for ( ;; ) { |
/* |
sz = strcspn(start, " ,"); |
* Parse the escape sequence and see if it's a |
if ('\0' == start[(int)sz]) |
* predefined character or special character. |
break; |
*/ |
|
esc = mandoc_escape |
|
((const char **)&val, &seq, &len); |
|
if (ESCAPE_ERROR == esc) |
|
break; |
|
|
buf->len = 0; |
if (ESCAPE_SPECIAL != esc) |
buf_appendb(buf, start, sz); |
continue; |
buf_appendb(buf, "", 1); |
if (0 == (u = mchars_spec2cp(mc, seq, len))) |
|
continue; |
|
|
hash_put(hash, buf, TYPE_Nm); |
/* |
|
* If we have a Unicode codepoint, try to convert that |
|
* to a UTF-8 byte string. |
|
*/ |
|
cpp = utfbuf; |
|
if (0 == (sz = utf8(u, utfbuf))) |
|
continue; |
|
|
if (' ' == start[(int)sz]) { |
/* Copy the rendered glyph into the stream. */ |
start += (int)sz + 1; |
|
break; |
|
} |
|
|
|
assert(',' == start[(int)sz]); |
sz = strlen(cpp); |
start += (int)sz + 1; |
bsz += sz; |
while (' ' == *start) |
|
start++; |
|
} |
|
|
|
buf->len = 0; |
buf = mandoc_realloc(buf, bsz); |
|
|
if (sv == start) { |
memcpy(&buf[pos], cpp, sz); |
buf_append(buf, start); |
pos += sz; |
free(title); |
|
return(1); |
|
} |
|
|
|
while (isspace((unsigned char)*start)) |
|
start++; |
|
|
|
if (0 == strncmp(start, "-", 1)) |
|
start += 1; |
|
else if (0 == strncmp(start, "\\-\\-", 4)) |
|
start += 4; |
|
else if (0 == strncmp(start, "\\-", 2)) |
|
start += 2; |
|
else if (0 == strncmp(start, "\\(en", 4)) |
|
start += 4; |
|
else if (0 == strncmp(start, "\\(em", 4)) |
|
start += 4; |
|
|
|
while (' ' == *start) |
|
start++; |
|
|
|
sz = strlen(start) + 1; |
|
buf_appendb(dbuf, start, sz); |
|
buf_appendb(buf, start, sz); |
|
|
|
hash_put(hash, buf, TYPE_Nd); |
|
free(title); |
|
} |
|
} |
} |
|
|
for (n = n->child; n; n = n->next) |
buf[pos] = '\0'; |
if (pman_node(hash, buf, dbuf, n)) |
key->utf8 = buf; |
return(1); |
|
|
|
return(0); |
|
} |
} |
|
|
/* |
/* |
* Parse a formatted manual page. |
* Flush the current page's terms (and their bits) into the database. |
* By necessity, this involves rather crude guesswork. |
* Wrap the entire set of additions in a transaction to make sqlite be a |
|
* little faster. |
|
* Also, UTF-8-encode the description at the last possible moment. |
*/ |
*/ |
static void |
static void |
pformatted(DB *hash, struct buf *buf, |
dbindex(struct mchars *mc, int form, const struct of *of) |
struct buf *dbuf, const struct of *of) |
|
{ |
{ |
FILE *stream; |
struct str *key; |
char *line, *p, *title; |
const char *desc; |
size_t len, plen, titlesz; |
int64_t recno; |
|
size_t i; |
|
|
if (NULL == (stream = fopen(of->fname, "r"))) { |
if (verb) |
if (warnings) |
say(of->file, "Adding to index"); |
perror(of->fname); |
|
|
if (nodb) |
return; |
return; |
|
|
|
desc = ""; |
|
if (NULL != of->desc) { |
|
key = hashget(of->desc, strlen(of->desc)); |
|
assert(NULL != key); |
|
if (NULL == key->utf8) |
|
utf8key(mc, key); |
|
desc = key->utf8; |
} |
} |
|
|
/* |
SQL_EXEC("BEGIN TRANSACTION"); |
* Always use the title derived from the filename up front, |
|
* do not even try to find it in the file. This also makes |
|
* sure we don't end up with an orphan index record, even if |
|
* the file content turns out to be completely unintelligible. |
|
*/ |
|
|
|
buf->len = 0; |
i = 1; |
buf_append(buf, of->title); |
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->file); |
hash_put(hash, buf, TYPE_Nm); |
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->sec); |
|
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, of->arch); |
|
SQL_BIND_TEXT(stmts[STMT_INSERT_DOC], i, desc); |
|
SQL_BIND_INT(stmts[STMT_INSERT_DOC], i, form); |
|
SQL_STEP(stmts[STMT_INSERT_DOC]); |
|
recno = sqlite3_last_insert_rowid(db); |
|
sqlite3_reset(stmts[STMT_INSERT_DOC]); |
|
|
/* Skip to first blank line. */ |
for (key = words; NULL != key; key = key->next) { |
|
assert(key->of == of); |
while (NULL != (line = fgetln(stream, &len))) |
if (NULL == key->utf8) |
if ('\n' == *line) |
utf8key(mc, key); |
break; |
i = 1; |
|
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); |
/* |
SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8); |
* Assume the first line that is not indented |
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); |
* is the first section header. Skip to it. |
SQL_STEP(stmts[STMT_INSERT_KEY]); |
*/ |
sqlite3_reset(stmts[STMT_INSERT_KEY]); |
|
|
while (NULL != (line = fgetln(stream, &len))) |
|
if ('\n' != *line && ' ' != *line) |
|
break; |
|
|
|
/* |
|
* Read up until the next section into a buffer. |
|
* Strip the leading and trailing newline from each read line, |
|
* appending a trailing space. |
|
* Ignore empty (whitespace-only) lines. |
|
*/ |
|
|
|
titlesz = 0; |
|
title = NULL; |
|
|
|
while (NULL != (line = fgetln(stream, &len))) { |
|
if (' ' != *line || '\n' != line[(int)len - 1]) |
|
break; |
|
while (len > 0 && isspace((unsigned char)*line)) { |
|
line++; |
|
len--; |
|
} |
|
if (1 == len) |
|
continue; |
|
title = mandoc_realloc(title, titlesz + len); |
|
memcpy(title + titlesz, line, len); |
|
titlesz += len; |
|
title[(int)titlesz - 1] = ' '; |
|
} |
} |
|
|
|
SQL_EXEC("END TRANSACTION"); |
|
} |
|
|
/* |
static void |
* If no page content can be found, or the input line |
dbprune(void) |
* is already the next section header, or there is no |
{ |
* trailing newline, reuse the page title as the page |
struct of *of; |
* description. |
size_t i; |
*/ |
|
|
|
if (NULL == title || '\0' == *title) { |
if (nodb) |
if (warnings) |
|
fprintf(stderr, "%s: cannot find NAME section\n", |
|
of->fname); |
|
buf_appendb(dbuf, buf->cp, buf->size); |
|
hash_put(hash, buf, TYPE_Nd); |
|
fclose(stream); |
|
free(title); |
|
return; |
return; |
} |
|
|
|
title = mandoc_realloc(title, titlesz + 1); |
for (of = ofs; NULL != of; of = of->next) { |
title[(int)titlesz] = '\0'; |
i = 1; |
|
SQL_BIND_TEXT(stmts[STMT_DELETE], i, of->file); |
/* |
SQL_STEP(stmts[STMT_DELETE]); |
* Skip to the first dash. |
sqlite3_reset(stmts[STMT_DELETE]); |
* Use the remaining line as the description (no more than 70 |
if (verb) |
* bytes). |
say(of->file, "Deleted from index"); |
*/ |
|
|
|
if (NULL != (p = strstr(title, "- "))) { |
|
for (p += 2; ' ' == *p || '\b' == *p; p++) |
|
/* Skip to next word. */ ; |
|
} else { |
|
if (warnings) |
|
fprintf(stderr, "%s: no dash in title line\n", |
|
of->fname); |
|
p = title; |
|
} |
} |
|
|
plen = strlen(p); |
|
|
|
/* Strip backspace-encoding from line. */ |
|
|
|
while (NULL != (line = memchr(p, '\b', plen))) { |
|
len = line - p; |
|
if (0 == len) { |
|
memmove(line, line + 1, plen--); |
|
continue; |
|
} |
|
memmove(line - 1, line + 1, plen - len); |
|
plen -= 2; |
|
} |
|
|
|
buf_appendb(dbuf, p, plen + 1); |
|
buf->len = 0; |
|
buf_appendb(buf, p, plen + 1); |
|
hash_put(hash, buf, TYPE_Nd); |
|
fclose(stream); |
|
free(title); |
|
} |
} |
|
|
|
/* |
|
* Close an existing database and its prepared statements. |
|
* If "real" is not set, rename the temporary file into the real one. |
|
*/ |
static void |
static void |
ofile_argbuild(int argc, char *argv[], struct of **of, |
dbclose(int real) |
const char *basedir) |
|
{ |
{ |
char buf[PATH_MAX]; |
size_t i; |
char pbuf[PATH_MAX]; |
|
const char *sec, *arch, *title; |
|
char *relpath, *p; |
|
int i, src_form; |
|
struct of *nof; |
|
|
|
for (i = 0; i < argc; i++) { |
if (nodb) |
if (NULL == (relpath = realpath(argv[i], pbuf))) { |
return; |
perror(argv[i]); |
|
continue; |
|
} |
|
if (NULL != basedir) { |
|
if (strstr(pbuf, basedir) != pbuf) { |
|
fprintf(stderr, "%s: file outside " |
|
"base directory %s\n", |
|
pbuf, basedir); |
|
continue; |
|
} |
|
relpath = pbuf + strlen(basedir); |
|
} |
|
|
|
/* |
for (i = 0; i < STMT__MAX; i++) { |
* Try to infer the manual section, architecture and |
sqlite3_finalize(stmts[i]); |
* page title from the path, assuming it looks like |
stmts[i] = NULL; |
* man*[/<arch>]/<title>.<section> or |
} |
* cat<section>[/<arch>]/<title>.0 |
|
*/ |
|
|
|
if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) { |
sqlite3_close(db); |
fprintf(stderr, "%s: path too long\n", relpath); |
db = NULL; |
continue; |
|
} |
|
sec = arch = title = ""; |
|
src_form = 0; |
|
p = strrchr(buf, '\0'); |
|
while (p-- > buf) { |
|
if ('\0' == *sec && '.' == *p) { |
|
sec = p + 1; |
|
*p = '\0'; |
|
if ('0' == *sec) |
|
src_form |= MANDOC_FORM; |
|
else if ('1' <= *sec && '9' >= *sec) |
|
src_form |= MANDOC_SRC; |
|
continue; |
|
} |
|
if ('/' != *p) |
|
continue; |
|
if ('\0' == *title) { |
|
title = p + 1; |
|
*p = '\0'; |
|
continue; |
|
} |
|
if (0 == strncmp("man", p + 1, 3)) |
|
src_form |= MANDOC_SRC; |
|
else if (0 == strncmp("cat", p + 1, 3)) |
|
src_form |= MANDOC_FORM; |
|
else |
|
arch = p + 1; |
|
break; |
|
} |
|
if ('\0' == *title) { |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s: cannot deduce title " |
|
"from filename\n", |
|
relpath); |
|
title = buf; |
|
} |
|
|
|
/* |
if (real) |
* Build the file structure. |
return; |
*/ |
|
|
|
nof = mandoc_calloc(1, sizeof(struct of)); |
if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { |
nof->fname = mandoc_strdup(relpath); |
exitcode = (int)MANDOCLEVEL_SYSERR; |
nof->sec = mandoc_strdup(sec); |
say(MANDOC_DB, NULL); |
nof->arch = mandoc_strdup(arch); |
|
nof->title = mandoc_strdup(title); |
|
nof->src_form = src_form; |
|
|
|
/* |
|
* Add the structure to the list. |
|
*/ |
|
|
|
if (NULL == *of) { |
|
*of = nof; |
|
(*of)->first = nof; |
|
} else { |
|
nof->first = (*of)->first; |
|
(*of)->next = nof; |
|
*of = nof; |
|
} |
|
} |
} |
} |
} |
|
|
/* |
/* |
* Recursively build up a list of files to parse. |
* This is straightforward stuff. |
* We use this instead of ftw() and so on because I don't want global |
* Open a database connection to a "temporary" database, then open a set |
* variables hanging around. |
* of prepared statements we'll use over and over again. |
* This ignores the mandoc.db and mandoc.index files, but assumes that |
* If "real" is set, we use the existing database; if not, we truncate a |
* everything else is a manual. |
* temporary one. |
* Pass in a pointer to a NULL structure for the first invocation. |
* Must be matched by dbclose(). |
*/ |
*/ |
static void |
static int |
ofile_dirbuild(const char *dir, const char* psec, const char *parch, |
dbopen(int real) |
int p_src_form, struct of **of) |
|
{ |
{ |
char buf[PATH_MAX]; |
const char *file, *sql; |
#if defined(__sun) |
int rc, ofl; |
struct stat sb; |
|
#endif |
|
size_t sz; |
|
DIR *d; |
|
const char *fn, *sec, *arch; |
|
char *p, *q, *suffix; |
|
struct of *nof; |
|
struct dirent *dp; |
|
int src_form; |
|
|
|
if (NULL == (d = opendir(dir))) { |
if (nodb) |
if (warnings) |
return(1); |
perror(dir); |
|
return; |
ofl = SQLITE_OPEN_READWRITE; |
|
if (0 == real) { |
|
file = MANDOC_DB "~"; |
|
if (-1 == remove(file) && ENOENT != errno) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say(file, NULL); |
|
return(0); |
|
} |
|
ofl |= SQLITE_OPEN_EXCLUSIVE; |
|
} else |
|
file = MANDOC_DB; |
|
|
|
rc = sqlite3_open_v2(file, &db, ofl, NULL); |
|
if (SQLITE_OK == rc) |
|
goto prepare_statements; |
|
if (SQLITE_CANTOPEN != rc) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say(file, NULL); |
|
return(0); |
} |
} |
|
|
while (NULL != (dp = readdir(d))) { |
sqlite3_close(db); |
fn = dp->d_name; |
db = NULL; |
|
|
if ('.' == *fn) |
if (SQLITE_OK != (rc = sqlite3_open(file, &db))) { |
continue; |
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say(file, NULL); |
|
return(0); |
|
} |
|
|
src_form = p_src_form; |
sql = "CREATE TABLE \"docs\" (\n" |
|
" \"file\" TEXT NOT NULL,\n" |
|
" \"sec\" TEXT NOT NULL,\n" |
|
" \"arch\" TEXT NOT NULL,\n" |
|
" \"desc\" TEXT NOT NULL,\n" |
|
" \"form\" INTEGER NOT NULL,\n" |
|
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" |
|
");\n" |
|
"\n" |
|
"CREATE TABLE \"keys\" (\n" |
|
" \"bits\" INTEGER NOT NULL,\n" |
|
" \"key\" TEXT NOT NULL,\n" |
|
" \"docid\" INTEGER NOT NULL REFERENCES docs(id) " |
|
"ON DELETE CASCADE,\n" |
|
" \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" |
|
");\n" |
|
"\n" |
|
"CREATE INDEX \"key_index\" ON keys (key);\n"; |
|
|
#if defined(__sun) |
if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { |
stat(dp->d_name, &sb); |
exitcode = (int)MANDOCLEVEL_SYSERR; |
if (S_IFDIR & sb.st_mode) { |
say(file, "%s", sqlite3_errmsg(db)); |
#else |
return(0); |
if (DT_DIR == dp->d_type) { |
} |
#endif |
|
sec = psec; |
|
arch = parch; |
|
|
|
/* |
prepare_statements: |
* By default, only use directories called: |
SQL_EXEC("PRAGMA foreign_keys = ON"); |
* man<section>/[<arch>/] or |
sql = "DELETE FROM docs where file=?"; |
* cat<section>/[<arch>/] |
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE], NULL); |
*/ |
sql = "INSERT INTO docs " |
|
"(file,sec,arch,desc,form) VALUES (?,?,?,?,?)"; |
|
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_DOC], NULL); |
|
sql = "INSERT INTO keys " |
|
"(bits,key,docid) VALUES (?,?,?)"; |
|
sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); |
|
return(1); |
|
} |
|
|
if ('\0' == *sec) { |
static void * |
if(0 == strncmp("man", fn, 3)) { |
hash_halloc(size_t sz, void *arg) |
src_form |= MANDOC_SRC; |
{ |
sec = fn + 3; |
|
} else if (0 == strncmp("cat", fn, 3)) { |
|
src_form |= MANDOC_FORM; |
|
sec = fn + 3; |
|
} else { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: bad section\n", |
|
dir, fn); |
|
if (use_all) |
|
sec = fn; |
|
else |
|
continue; |
|
} |
|
} else if ('\0' == *arch) { |
|
if (NULL != strchr(fn, '.')) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: bad architecture\n", |
|
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
|
arch = fn; |
|
} else { |
|
if (warnings) fprintf(stderr, "%s/%s: " |
|
"excessive subdirectory\n", dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
|
|
|
buf[0] = '\0'; |
return(mandoc_calloc(sz, 1)); |
strlcat(buf, dir, PATH_MAX); |
} |
strlcat(buf, "/", PATH_MAX); |
|
sz = strlcat(buf, fn, PATH_MAX); |
|
|
|
if (PATH_MAX <= sz) { |
static void * |
if (warnings) fprintf(stderr, "%s/%s: " |
hash_alloc(size_t sz, void *arg) |
"path too long\n", dir, fn); |
{ |
continue; |
|
} |
|
|
|
ofile_dirbuild(buf, sec, arch, src_form, of); |
return(mandoc_malloc(sz)); |
continue; |
} |
} |
|
|
|
#if defined(__sun) |
static void |
if (0 == S_IFREG & sb.st_mode) { |
hash_free(void *p, size_t sz, void *arg) |
#else |
{ |
if (DT_REG != dp->d_type) { |
|
#endif |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s/%s: not a regular file\n", |
|
dir, fn); |
|
continue; |
|
} |
|
if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) |
|
continue; |
|
if ('\0' == *psec) { |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s/%s: file outside section\n", |
|
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
|
|
|
/* |
free(p); |
* By default, skip files where the file name suffix |
} |
* does not agree with the section directory |
|
* they are located in. |
|
*/ |
|
|
|
suffix = strrchr(fn, '.'); |
static int |
if (NULL == suffix) { |
set_basedir(const char *targetdir) |
if (warnings) |
{ |
fprintf(stderr, |
static char startdir[PATH_MAX]; |
"%s/%s: no filename suffix\n", |
static int fd; |
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} else if ((MANDOC_SRC & src_form && |
|
strcmp(suffix + 1, psec)) || |
|
(MANDOC_FORM & src_form && |
|
strcmp(suffix + 1, "0"))) { |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s/%s: wrong filename suffix\n", |
|
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
if ('0' == suffix[1]) |
|
src_form |= MANDOC_FORM; |
|
else if ('1' <= suffix[1] && '9' >= suffix[1]) |
|
src_form |= MANDOC_SRC; |
|
} |
|
|
|
/* |
/* |
* Skip formatted manuals if a source version is |
* Remember where we started by keeping a fd open to the origin |
* available. Ignore the age: it is very unlikely |
* path component: throughout this utility, we chdir() a lot to |
* that people install newer formatted base manuals |
* handle relative paths, and by doing this, we can return to |
* when they used to have source manuals before, |
* the starting point. |
* and in ports, old manuals get removed on update. |
*/ |
*/ |
if ('\0' == *startdir) { |
if (0 == use_all && MANDOC_FORM & src_form && |
if (NULL == getcwd(startdir, PATH_MAX)) { |
'\0' != *psec) { |
exitcode = (int)MANDOCLEVEL_SYSERR; |
buf[0] = '\0'; |
if (NULL != targetdir) |
strlcat(buf, dir, PATH_MAX); |
say(".", NULL); |
p = strrchr(buf, '/'); |
return(0); |
if ('\0' != *parch && NULL != p) |
|
for (p--; p > buf; p--) |
|
if ('/' == *p) |
|
break; |
|
if (NULL == p) |
|
p = buf; |
|
else |
|
p++; |
|
if (0 == strncmp("cat", p, 3)) |
|
memcpy(p, "man", 3); |
|
strlcat(buf, "/", PATH_MAX); |
|
sz = strlcat(buf, fn, PATH_MAX); |
|
if (sz >= PATH_MAX) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: path too long\n", |
|
dir, fn); |
|
continue; |
|
} |
|
q = strrchr(buf, '.'); |
|
if (NULL != q && p < q++) { |
|
*q = '\0'; |
|
sz = strlcat(buf, psec, PATH_MAX); |
|
if (sz >= PATH_MAX) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: path too long\n", |
|
dir, fn); |
|
continue; |
|
} |
|
if (0 == access(buf, R_OK)) |
|
continue; |
|
} |
|
} |
} |
|
if (-1 == (fd = open(startdir, O_RDONLY, 0))) { |
buf[0] = '\0'; |
exitcode = (int)MANDOCLEVEL_SYSERR; |
assert('.' == dir[0]); |
say(startdir, NULL); |
if ('/' == dir[1]) { |
return(0); |
strlcat(buf, dir + 2, PATH_MAX); |
|
strlcat(buf, "/", PATH_MAX); |
|
} |
} |
sz = strlcat(buf, fn, PATH_MAX); |
if (NULL == targetdir) |
if (sz >= PATH_MAX) { |
targetdir = startdir; |
if (warnings) fprintf(stderr, |
} else { |
"%s/%s: path too long\n", dir, fn); |
if (-1 == fd) |
continue; |
return(0); |
|
if (-1 == fchdir(fd)) { |
|
close(fd); |
|
basedir[0] = '\0'; |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say(startdir, NULL); |
|
return(0); |
} |
} |
|
if (NULL == targetdir) { |
nof = mandoc_calloc(1, sizeof(struct of)); |
close(fd); |
nof->fname = mandoc_strdup(buf); |
return(1); |
nof->sec = mandoc_strdup(psec); |
|
nof->arch = mandoc_strdup(parch); |
|
nof->src_form = src_form; |
|
|
|
/* |
|
* Remember the file name without the extension, |
|
* to be used as the page title in the database. |
|
*/ |
|
|
|
if (NULL != suffix) |
|
*suffix = '\0'; |
|
nof->title = mandoc_strdup(fn); |
|
|
|
/* |
|
* Add the structure to the list. |
|
*/ |
|
|
|
if (NULL == *of) { |
|
*of = nof; |
|
(*of)->first = nof; |
|
} else { |
|
nof->first = (*of)->first; |
|
(*of)->next = nof; |
|
*of = nof; |
|
} |
} |
} |
} |
|
if (NULL == realpath(targetdir, basedir)) { |
closedir(d); |
basedir[0] = '\0'; |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(targetdir, NULL); |
|
return(0); |
|
} else if (-1 == chdir(basedir)) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say("", NULL); |
|
return(0); |
|
} |
|
return(1); |
} |
} |
|
|
static void |
static void |
ofile_free(struct of *of) |
say(const char *file, const char *format, ...) |
{ |
{ |
struct of *nof; |
va_list ap; |
|
|
if (NULL != of) |
if ('\0' != *basedir) |
of = of->first; |
fprintf(stderr, "%s", basedir); |
|
if ('\0' != *basedir && '\0' != *file) |
|
fputs("//", stderr); |
|
if ('\0' != *file) |
|
fprintf(stderr, "%s", file); |
|
fputs(": ", stderr); |
|
|
while (NULL != of) { |
if (NULL == format) { |
nof = of->next; |
perror(NULL); |
free(of->fname); |
return; |
free(of->sec); |
|
free(of->arch); |
|
free(of->title); |
|
free(of); |
|
of = nof; |
|
} |
} |
|
|
|
va_start(ap, format); |
|
vfprintf(stderr, format, ap); |
|
va_end(ap); |
|
|
|
fputc('\n', stderr); |
} |
} |