version 1.49.2.11, 2014/01/05 21:30:57 |
version 1.270, 2021/11/05 17:04:10 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
|
* Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2016 Ed Maste <emaste@freebsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
* copyright notice and this permission notice appear in all copies. |
* copyright notice and this permission notice appear in all copies. |
* |
* |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
* Implementation of the makewhatis(8) program. |
*/ |
*/ |
#ifdef HAVE_CONFIG_H |
|
#include "config.h" |
#include "config.h" |
#endif |
|
|
|
#include <sys/types.h> |
#include <sys/types.h> |
|
#include <sys/mman.h> |
|
#include <sys/stat.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <dirent.h> |
#if HAVE_ERR |
|
#include <err.h> |
|
#endif |
#include <errno.h> |
#include <errno.h> |
#include <fcntl.h> |
#include <fcntl.h> |
#include <getopt.h> |
#if HAVE_FTS |
|
#include <fts.h> |
|
#else |
|
#include "compat_fts.h" |
|
#endif |
#include <limits.h> |
#include <limits.h> |
|
#if HAVE_SANDBOX_INIT |
|
#include <sandbox.h> |
|
#endif |
|
#include <stdarg.h> |
|
#include <stddef.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdint.h> |
#include <stdint.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
|
#if defined(__APPLE__) |
#include "mandoc_aux.h" |
# include <libkern/OSByteOrder.h> |
#include "mandoc_ohash.h" |
#elif defined(__linux__) |
|
# include <endian.h> |
|
#elif defined(__sun) |
|
# include <sys/byteorder.h> |
|
# include <sys/stat.h> |
|
#else |
|
# include <sys/endian.h> |
|
#endif |
|
|
|
#if defined(__linux__) || defined(__sun) |
|
# include <db_185.h> |
|
#else |
|
# include <db.h> |
|
#endif |
|
|
|
#include "man.h" |
|
#include "mdoc.h" |
|
#include "mandoc.h" |
#include "mandoc.h" |
#include "mandocdb.h" |
#include "roff.h" |
#include "manpath.h" |
#include "mdoc.h" |
|
#include "man.h" |
|
#include "mandoc_parse.h" |
|
#include "manconf.h" |
|
#include "mansearch.h" |
|
#include "dba_array.h" |
|
#include "dba.h" |
|
|
#define MANDOC_BUFSZ BUFSIZ |
extern const char *const mansearch_keynames[]; |
#define MANDOC_SLOP 1024 |
|
|
|
#define MANDOC_SRC 0x1 |
enum op { |
#define MANDOC_FORM 0x2 |
OP_DEFAULT = 0, /* new dbs from dir list or default config */ |
|
OP_CONFFILE, /* new databases from custom config file */ |
|
OP_UPDATE, /* delete/add entries in existing database */ |
|
OP_DELETE, /* delete entries from existing database */ |
|
OP_TEST /* change no databases, report potential problems */ |
|
}; |
|
|
/* Access to the mandoc database on disk. */ |
struct str { |
|
const struct mpage *mpage; /* if set, the owning parse */ |
struct mdb { |
uint64_t mask; /* bitmask in sequence */ |
char idxn[PATH_MAX]; /* index db filename */ |
char key[]; /* rendered text */ |
char dbn[PATH_MAX]; /* keyword db filename */ |
|
DB *idx; /* index recno database */ |
|
DB *db; /* keyword btree database */ |
|
}; |
}; |
|
|
/* Stack of temporarily unused index records. */ |
struct inodev { |
|
ino_t st_ino; |
struct recs { |
dev_t st_dev; |
recno_t *stack; /* pointer to a malloc'ed array */ |
|
size_t size; /* number of allocated slots */ |
|
size_t cur; /* current number of empty records */ |
|
recno_t last; /* last record number in the index */ |
|
}; |
}; |
|
|
/* Tiny list for files. No need to bring in QUEUE. */ |
struct mpage { |
|
struct inodev inodev; /* used for hashing routine */ |
struct of { |
struct dba_array *dba; |
char *fname; /* heap-allocated */ |
char *sec; /* section from file content */ |
char *sec; |
char *arch; /* architecture from file content */ |
char *arch; |
char *title; /* title from file content */ |
char *title; |
char *desc; /* description from file content */ |
int src_form; |
struct mpage *next; /* singly linked list */ |
struct of *next; /* NULL for last one */ |
struct mlink *mlinks; /* singly linked list */ |
struct of *first; /* first in list */ |
int name_head_done; |
|
enum form form; /* format from file content */ |
}; |
}; |
|
|
/* Buffer for storing growable data. */ |
struct mlink { |
|
char file[PATH_MAX]; /* filename rel. to manpath */ |
struct buf { |
char *dsec; /* section from directory */ |
char *cp; |
char *arch; /* architecture from directory */ |
size_t len; /* current length */ |
char *name; /* name from file name (not empty) */ |
size_t size; /* total buffer size */ |
char *fsec; /* section from file name suffix */ |
|
struct mlink *next; /* singly linked list */ |
|
struct mpage *mpage; /* parent */ |
|
int gzip; /* filename has a .gz suffix */ |
|
enum form dform; /* format from directory */ |
|
enum form fform; /* format from file name suffix */ |
}; |
}; |
|
|
/* Operation we're going to perform. */ |
typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
|
enum op { |
struct mdoc_handler { |
OP_DEFAULT = 0, /* new dbs from dir list or default config */ |
mdoc_fp fp; /* optional handler */ |
OP_CONFFILE, /* new databases from custom config file */ |
uint64_t mask; /* set unless handler returns 0 */ |
OP_UPDATE, /* delete/add entries in existing database */ |
int taboo; /* node flags that must not be set */ |
OP_DELETE, /* delete entries from existing database */ |
|
OP_TEST /* change no databases, report potential problems */ |
|
}; |
}; |
|
|
#define MAN_ARGS DB *hash, \ |
|
struct buf *buf, \ |
|
struct buf *dbuf, \ |
|
const struct man_node *n |
|
#define MDOC_ARGS DB *hash, \ |
|
struct buf *buf, \ |
|
struct buf *dbuf, \ |
|
const struct mdoc_node *n, \ |
|
const struct mdoc_meta *m |
|
|
|
static void buf_appendmdoc(struct buf *, |
int mandocdb(int, char *[]); |
const struct mdoc_node *, int); |
|
static void buf_append(struct buf *, const char *); |
|
static void buf_appendb(struct buf *, |
|
const void *, size_t); |
|
static void dbt_put(DB *, const char *, DBT *, DBT *); |
|
static void hash_put(DB *, const struct buf *, uint64_t); |
|
static void hash_reset(DB **); |
|
static void index_merge(const struct of *, struct mparse *, |
|
struct buf *, struct buf *, DB *, |
|
struct mdb *, struct recs *); |
|
static void index_prune(const struct of *, struct mdb *, |
|
struct recs *); |
|
static void ofile_argbuild(int, char *[], struct of **, |
|
const char *); |
|
static void ofile_dirbuild(const char *, const char *, |
|
const char *, int, struct of **); |
|
static void ofile_free(struct of *); |
|
static void pformatted(DB *, struct buf *, |
|
struct buf *, const struct of *); |
|
static int pman_node(MAN_ARGS); |
|
static void pmdoc_node(MDOC_ARGS); |
|
static int pmdoc_head(MDOC_ARGS); |
|
static int pmdoc_body(MDOC_ARGS); |
|
static int pmdoc_Fd(MDOC_ARGS); |
|
static int pmdoc_In(MDOC_ARGS); |
|
static int pmdoc_Fn(MDOC_ARGS); |
|
static int pmdoc_Nd(MDOC_ARGS); |
|
static int pmdoc_Nm(MDOC_ARGS); |
|
static int pmdoc_Sh(MDOC_ARGS); |
|
static int pmdoc_St(MDOC_ARGS); |
|
static int pmdoc_Xr(MDOC_ARGS); |
|
|
|
#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ |
static void dbadd(struct dba *, struct mpage *); |
|
static void dbadd_mlink(const struct mlink *); |
|
static void dbprune(struct dba *); |
|
static void dbwrite(struct dba *); |
|
static void filescan(const char *); |
|
#if HAVE_FTS_COMPARE_CONST |
|
static int fts_compare(const FTSENT *const *, const FTSENT *const *); |
|
#else |
|
static int fts_compare(const FTSENT **, const FTSENT **); |
|
#endif |
|
static void mlink_add(struct mlink *, const struct stat *); |
|
static void mlink_check(struct mpage *, struct mlink *); |
|
static void mlink_free(struct mlink *); |
|
static void mlinks_undupe(struct mpage *); |
|
static void mpages_free(void); |
|
static void mpages_merge(struct dba *, struct mparse *); |
|
static void parse_cat(struct mpage *, int); |
|
static void parse_man(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static void parse_mdoc(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_head(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static void parse_mdoc_fname(struct mpage *, const struct roff_node *); |
|
static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, |
|
const struct roff_node *); |
|
static void putkey(const struct mpage *, char *, uint64_t); |
|
static void putkeys(const struct mpage *, char *, size_t, uint64_t); |
|
static void putmdockey(const struct mpage *, |
|
const struct roff_node *, uint64_t, int); |
|
#ifdef READ_ALLOWED_PATH |
|
static int read_allowed(const char *); |
|
#endif |
|
static int render_string(char **, size_t *); |
|
static void say(const char *, const char *, ...) |
|
__attribute__((__format__ (__printf__, 2, 3))); |
|
static int set_basedir(const char *, int); |
|
static int treescan(void); |
|
static size_t utf8(unsigned int, char [7]); |
|
|
struct mdoc_handler { |
static int nodb; /* no database changes */ |
int (*fp)(MDOC_ARGS); /* Optional handler. */ |
static int mparse_options; /* abort the parse early */ |
uint64_t mask; /* Set unless handler returns 0. */ |
static int use_all; /* use all found files */ |
int flags; /* For use by pmdoc_node. */ |
static int debug; /* print what we're doing */ |
}; |
static int warnings; /* warn about crap */ |
|
static int write_utf8; /* write UTF-8 output; else ASCII */ |
|
static int exitcode; /* to be returned by main */ |
|
static enum op op; /* operational mode */ |
|
static char basedir[PATH_MAX]; /* current base directory */ |
|
static size_t basedir_len; /* strlen(basedir) */ |
|
static struct mpage *mpage_head; /* list of distinct manual pages */ |
|
static struct ohash mpages; /* table of distinct manual pages */ |
|
static struct ohash mlinks; /* table of directory entries */ |
|
static struct ohash names; /* table of all names */ |
|
static struct ohash strings; /* table of all strings */ |
|
static uint64_t name_mask; |
|
|
static const struct mdoc_handler mdocs[MDOC_MAX] = { |
static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = { |
{ NULL, 0, 0 }, /* Ap */ |
{ NULL, 0, NODE_NOPRT }, /* Dd */ |
{ NULL, 0, 0 }, /* Dd */ |
{ NULL, 0, NODE_NOPRT }, /* Dt */ |
{ NULL, 0, 0 }, /* Dt */ |
{ NULL, 0, NODE_NOPRT }, /* Os */ |
{ NULL, 0, 0 }, /* Os */ |
{ parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */ |
{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ |
{ parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */ |
{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ |
|
{ NULL, 0, 0 }, /* Pp */ |
{ NULL, 0, 0 }, /* Pp */ |
{ NULL, 0, 0 }, /* D1 */ |
{ NULL, 0, 0 }, /* D1 */ |
{ NULL, 0, 0 }, /* Dl */ |
{ NULL, 0, 0 }, /* Dl */ |
Line 178 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 207 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* El */ |
{ NULL, 0, 0 }, /* El */ |
{ NULL, 0, 0 }, /* It */ |
{ NULL, 0, 0 }, /* It */ |
{ NULL, 0, 0 }, /* Ad */ |
{ NULL, 0, 0 }, /* Ad */ |
{ NULL, TYPE_An, MDOCF_CHILD }, /* An */ |
{ NULL, TYPE_An, 0 }, /* An */ |
{ NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ |
{ NULL, 0, 0 }, /* Ap */ |
{ NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ |
{ NULL, TYPE_Ar, 0 }, /* Ar */ |
{ NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ |
{ NULL, TYPE_Cd, 0 }, /* Cd */ |
{ NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ |
{ NULL, TYPE_Cm, 0 }, /* Cm */ |
{ NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ |
{ NULL, TYPE_Dv, 0 }, /* Dv */ |
{ NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ |
{ NULL, TYPE_Er, 0 }, /* Er */ |
|
{ NULL, TYPE_Ev, 0 }, /* Ev */ |
{ NULL, 0, 0 }, /* Ex */ |
{ NULL, 0, 0 }, /* Ex */ |
{ NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ |
{ parse_mdoc_Fa, 0, 0 }, /* Fa */ |
{ pmdoc_Fd, TYPE_In, 0 }, /* Fd */ |
{ parse_mdoc_Fd, 0, 0 }, /* Fd */ |
{ NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ |
{ NULL, TYPE_Fl, 0 }, /* Fl */ |
{ pmdoc_Fn, 0, 0 }, /* Fn */ |
{ parse_mdoc_Fn, 0, 0 }, /* Fn */ |
{ NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ |
{ NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */ |
{ NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ |
{ NULL, TYPE_Ic, 0 }, /* Ic */ |
{ pmdoc_In, TYPE_In, 0 }, /* In */ |
{ NULL, TYPE_In, 0 }, /* In */ |
{ NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ |
{ NULL, TYPE_Li, 0 }, /* Li */ |
{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ |
{ parse_mdoc_Nd, 0, 0 }, /* Nd */ |
{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ |
{ parse_mdoc_Nm, 0, 0 }, /* Nm */ |
{ NULL, 0, 0 }, /* Op */ |
{ NULL, 0, 0 }, /* Op */ |
{ NULL, 0, 0 }, /* Ot */ |
{ NULL, 0, 0 }, /* Ot */ |
{ NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ |
{ NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */ |
{ NULL, 0, 0 }, /* Rv */ |
{ NULL, 0, 0 }, /* Rv */ |
{ pmdoc_St, TYPE_St, 0 }, /* St */ |
{ NULL, TYPE_St, 0 }, /* St */ |
{ NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ |
{ parse_mdoc_Va, TYPE_Va, 0 }, /* Va */ |
{ pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ |
{ parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */ |
{ pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ |
{ parse_mdoc_Xr, 0, 0 }, /* Xr */ |
{ NULL, 0, 0 }, /* %A */ |
{ NULL, 0, 0 }, /* %A */ |
{ NULL, 0, 0 }, /* %B */ |
{ NULL, 0, 0 }, /* %B */ |
{ NULL, 0, 0 }, /* %D */ |
{ NULL, 0, 0 }, /* %D */ |
Line 218 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 248 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* Ac */ |
{ NULL, 0, 0 }, /* Ac */ |
{ NULL, 0, 0 }, /* Ao */ |
{ NULL, 0, 0 }, /* Ao */ |
{ NULL, 0, 0 }, /* Aq */ |
{ NULL, 0, 0 }, /* Aq */ |
{ NULL, TYPE_At, MDOCF_CHILD }, /* At */ |
{ NULL, TYPE_At, 0 }, /* At */ |
{ NULL, 0, 0 }, /* Bc */ |
{ NULL, 0, 0 }, /* Bc */ |
{ NULL, 0, 0 }, /* Bf */ |
{ NULL, 0, 0 }, /* Bf */ |
{ NULL, 0, 0 }, /* Bo */ |
{ NULL, 0, 0 }, /* Bo */ |
{ NULL, 0, 0 }, /* Bq */ |
{ NULL, 0, 0 }, /* Bq */ |
{ NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ |
{ NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */ |
{ NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ |
{ NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */ |
{ NULL, 0, 0 }, /* Db */ |
{ NULL, 0, 0 }, /* Db */ |
{ NULL, 0, 0 }, /* Dc */ |
{ NULL, 0, 0 }, /* Dc */ |
{ NULL, 0, 0 }, /* Do */ |
{ NULL, 0, 0 }, /* Do */ |
{ NULL, 0, 0 }, /* Dq */ |
{ NULL, 0, 0 }, /* Dq */ |
{ NULL, 0, 0 }, /* Ec */ |
{ NULL, 0, 0 }, /* Ec */ |
{ NULL, 0, 0 }, /* Ef */ |
{ NULL, 0, 0 }, /* Ef */ |
{ NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ |
{ NULL, TYPE_Em, 0 }, /* Em */ |
{ NULL, 0, 0 }, /* Eo */ |
{ NULL, 0, 0 }, /* Eo */ |
{ NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ |
{ NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */ |
{ NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ |
{ NULL, TYPE_Ms, 0 }, /* Ms */ |
{ NULL, 0, 0 }, /* No */ |
{ NULL, 0, 0 }, /* No */ |
{ NULL, 0, 0 }, /* Ns */ |
{ NULL, 0, 0 }, /* Ns */ |
{ NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ |
{ NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */ |
{ NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ |
{ NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */ |
{ NULL, 0, 0 }, /* Pc */ |
{ NULL, 0, 0 }, /* Pc */ |
{ NULL, 0, 0 }, /* Pf */ |
{ NULL, 0, 0 }, /* Pf */ |
{ NULL, 0, 0 }, /* Po */ |
{ NULL, 0, 0 }, /* Po */ |
Line 254 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 284 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* Sq */ |
{ NULL, 0, 0 }, /* Sq */ |
{ NULL, 0, 0 }, /* Sm */ |
{ NULL, 0, 0 }, /* Sm */ |
{ NULL, 0, 0 }, /* Sx */ |
{ NULL, 0, 0 }, /* Sx */ |
{ NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ |
{ NULL, TYPE_Sy, 0 }, /* Sy */ |
{ NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ |
{ NULL, TYPE_Tn, 0 }, /* Tn */ |
{ NULL, 0, 0 }, /* Ux */ |
{ NULL, 0, NODE_NOSRC }, /* Ux */ |
{ NULL, 0, 0 }, /* Xc */ |
{ NULL, 0, 0 }, /* Xc */ |
{ NULL, 0, 0 }, /* Xo */ |
{ NULL, 0, 0 }, /* Xo */ |
{ pmdoc_head, TYPE_Fn, 0 }, /* Fo */ |
{ parse_mdoc_Fo, 0, 0 }, /* Fo */ |
{ NULL, 0, 0 }, /* Fc */ |
{ NULL, 0, 0 }, /* Fc */ |
{ NULL, 0, 0 }, /* Oo */ |
{ NULL, 0, 0 }, /* Oo */ |
{ NULL, 0, 0 }, /* Oc */ |
{ NULL, 0, 0 }, /* Oc */ |
Line 269 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
Line 299 static const struct mdoc_handler mdocs[MDOC_MAX] = { |
|
{ NULL, 0, 0 }, /* Hf */ |
{ NULL, 0, 0 }, /* Hf */ |
{ NULL, 0, 0 }, /* Fr */ |
{ NULL, 0, 0 }, /* Fr */ |
{ NULL, 0, 0 }, /* Ud */ |
{ NULL, 0, 0 }, /* Ud */ |
{ NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ |
{ NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */ |
{ NULL, 0, 0 }, /* Lp */ |
{ NULL, 0, 0 }, /* Lp */ |
{ NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ |
{ NULL, TYPE_Lk, 0 }, /* Lk */ |
{ NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ |
{ NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */ |
{ NULL, 0, 0 }, /* Brq */ |
{ NULL, 0, 0 }, /* Brq */ |
{ NULL, 0, 0 }, /* Bro */ |
{ NULL, 0, 0 }, /* Bro */ |
{ NULL, 0, 0 }, /* Brc */ |
{ NULL, 0, 0 }, /* Brc */ |
{ NULL, 0, 0 }, /* %C */ |
{ NULL, 0, 0 }, /* %C */ |
{ NULL, 0, 0 }, /* Es */ |
{ NULL, 0, 0 }, /* Es */ |
{ NULL, 0, 0 }, /* En */ |
{ NULL, 0, 0 }, /* En */ |
{ NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ |
{ NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ |
{ NULL, 0, 0 }, /* %Q */ |
{ NULL, 0, 0 }, /* %Q */ |
{ NULL, 0, 0 }, /* br */ |
|
{ NULL, 0, 0 }, /* sp */ |
|
{ NULL, 0, 0 }, /* %U */ |
{ NULL, 0, 0 }, /* %U */ |
{ NULL, 0, 0 }, /* Ta */ |
{ NULL, 0, 0 }, /* Ta */ |
}; |
}; |
|
|
static const char *progname; |
|
static int quick; /* abort the parse early */ |
|
static int use_all; /* Use all directories and files. */ |
|
static int verb; /* Output verbosity level. */ |
|
static int warnings; /* Potential problems in manuals. */ |
|
|
|
int |
int |
main(int argc, char *argv[]) |
mandocdb(int argc, char *argv[]) |
{ |
{ |
struct mparse *mp; /* parse sequence */ |
struct manconf conf; |
struct manpaths dirs; |
struct mparse *mp; |
struct mdb mdb; |
struct dba *dba; |
struct recs recs; |
const char *path_arg, *progname; |
enum op op; /* current operation */ |
size_t j, sz; |
const char *dir; |
int ch, i; |
char *cp; |
|
char pbuf[PATH_MAX]; |
|
int ch, i, flags; |
|
DB *hash; /* temporary keyword hashtable */ |
|
BTREEINFO info; /* btree configuration */ |
|
size_t sz1, sz2, ipath; |
|
struct buf buf, /* keyword buffer */ |
|
dbuf; /* description buffer */ |
|
struct of *of; /* list of files for processing */ |
|
extern int optind; |
|
extern char *optarg; |
|
|
|
progname = strrchr(argv[0], '/'); |
#if HAVE_PLEDGE |
if (progname == NULL) |
if (pledge("stdio rpath wpath cpath", NULL) == -1) { |
progname = argv[0]; |
warn("pledge"); |
else |
return (int)MANDOCLEVEL_SYSERR; |
++progname; |
} |
|
#endif |
|
|
memset(&dirs, 0, sizeof(struct manpaths)); |
#if HAVE_SANDBOX_INIT |
memset(&mdb, 0, sizeof(struct mdb)); |
if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) { |
memset(&recs, 0, sizeof(struct recs)); |
warnx("sandbox_init"); |
|
return (int)MANDOCLEVEL_SYSERR; |
|
} |
|
#endif |
|
|
of = NULL; |
memset(&conf, 0, sizeof(conf)); |
mp = NULL; |
|
hash = NULL; |
/* |
|
* We accept a few different invocations. |
|
* The CHECKOP macro makes sure that invocation styles don't |
|
* clobber each other. |
|
*/ |
|
#define CHECKOP(_op, _ch) do \ |
|
if ((_op) != OP_DEFAULT) { \ |
|
warnx("-%c: Conflicting option", (_ch)); \ |
|
goto usage; \ |
|
} while (/*CONSTCOND*/0) |
|
|
|
mparse_options = MPARSE_VALIDATE; |
|
path_arg = NULL; |
op = OP_DEFAULT; |
op = OP_DEFAULT; |
dir = NULL; |
|
|
|
while (-1 != (ch = getopt(argc, argv, "aC:d:Qtu:vW"))) |
while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1) |
switch (ch) { |
switch (ch) { |
case ('a'): |
case 'a': |
use_all = 1; |
use_all = 1; |
break; |
break; |
case ('C'): |
case 'C': |
if (op) { |
CHECKOP(op, ch); |
fprintf(stderr, |
path_arg = optarg; |
"-C: conflicting options\n"); |
|
goto usage; |
|
} |
|
dir = optarg; |
|
op = OP_CONFFILE; |
op = OP_CONFFILE; |
break; |
break; |
case ('d'): |
case 'D': |
if (op) { |
debug++; |
fprintf(stderr, |
break; |
"-d: conflicting options\n"); |
case 'd': |
goto usage; |
CHECKOP(op, ch); |
} |
path_arg = optarg; |
dir = optarg; |
|
op = OP_UPDATE; |
op = OP_UPDATE; |
break; |
break; |
case ('Q'): |
case 'n': |
quick = 1; |
nodb = 1; |
break; |
break; |
case ('t'): |
case 'p': |
dup2(STDOUT_FILENO, STDERR_FILENO); |
|
if (op) { |
|
fprintf(stderr, |
|
"-t: conflicting options\n"); |
|
goto usage; |
|
} |
|
op = OP_TEST; |
|
use_all = 1; |
|
warnings = 1; |
warnings = 1; |
break; |
break; |
case ('u'): |
case 'Q': |
if (op) { |
mparse_options |= MPARSE_QUICK; |
fprintf(stderr, |
break; |
"-u: conflicting options\n"); |
case 'T': |
|
if (strcmp(optarg, "utf8") != 0) { |
|
warnx("-T%s: Unsupported output format", |
|
optarg); |
goto usage; |
goto usage; |
} |
} |
dir = optarg; |
write_utf8 = 1; |
op = OP_DELETE; |
|
break; |
break; |
case ('v'): |
case 't': |
verb++; |
CHECKOP(op, ch); |
|
dup2(STDOUT_FILENO, STDERR_FILENO); |
|
op = OP_TEST; |
|
nodb = warnings = 1; |
break; |
break; |
case ('W'): |
case 'u': |
warnings = 1; |
CHECKOP(op, ch); |
|
path_arg = optarg; |
|
op = OP_DELETE; |
break; |
break; |
|
case 'v': |
|
/* Compatibility with espie@'s makewhatis. */ |
|
break; |
default: |
default: |
goto usage; |
goto usage; |
} |
} |
Line 389 main(int argc, char *argv[]) |
|
Line 413 main(int argc, char *argv[]) |
|
argc -= optind; |
argc -= optind; |
argv += optind; |
argv += optind; |
|
|
if (OP_CONFFILE == op && argc > 0) { |
#if HAVE_PLEDGE |
fprintf(stderr, "-C: too many arguments\n"); |
if (nodb) { |
goto usage; |
if (pledge("stdio rpath", NULL) == -1) { |
|
warn("pledge"); |
|
return (int)MANDOCLEVEL_SYSERR; |
|
} |
} |
} |
|
#endif |
|
|
memset(&info, 0, sizeof(BTREEINFO)); |
if (op == OP_CONFFILE && argc > 0) { |
info.lorder = 4321; |
warnx("-C: Too many arguments"); |
info.flags = R_DUP; |
goto usage; |
|
|
mp = mparse_alloc(MPARSE_AUTO, |
|
MANDOCLEVEL_FATAL, NULL, NULL, quick); |
|
|
|
memset(&buf, 0, sizeof(struct buf)); |
|
memset(&dbuf, 0, sizeof(struct buf)); |
|
|
|
buf.size = dbuf.size = MANDOC_BUFSZ; |
|
|
|
buf.cp = mandoc_malloc(buf.size); |
|
dbuf.cp = mandoc_malloc(dbuf.size); |
|
|
|
if (OP_TEST == op) { |
|
ofile_argbuild(argc, argv, &of, NULL); |
|
if (NULL == of) |
|
goto out; |
|
index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); |
|
goto out; |
|
} |
} |
|
|
if (OP_UPDATE == op || OP_DELETE == op) { |
exitcode = (int)MANDOCLEVEL_OK; |
if (NULL == realpath(dir, pbuf)) { |
mchars_alloc(); |
perror(dir); |
mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL); |
exit((int)MANDOCLEVEL_BADARG); |
mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); |
} |
mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); |
if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) { |
|
fprintf(stderr, "%s: path too long\n", pbuf); |
|
exit((int)MANDOCLEVEL_BADARG); |
|
} |
|
|
|
strlcat(mdb.dbn, pbuf, PATH_MAX); |
if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) { |
sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX); |
|
|
|
strlcat(mdb.idxn, pbuf, PATH_MAX); |
|
sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX); |
|
|
|
if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) { |
|
fprintf(stderr, "%s: path too long\n", mdb.idxn); |
|
exit((int)MANDOCLEVEL_BADARG); |
|
} |
|
|
|
flags = O_CREAT | O_RDWR; |
|
mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); |
|
mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); |
|
|
|
if (NULL == mdb.db) { |
|
perror(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} else if (NULL == mdb.idx) { |
|
perror(mdb.idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
ofile_argbuild(argc, argv, &of, pbuf); |
|
|
|
if (NULL == of) |
|
goto out; |
|
|
|
index_prune(of, &mdb, &recs); |
|
|
|
/* |
/* |
* Go to the root of the respective manual tree. |
* Most of these deal with a specific directory. |
* This must work or no manuals may be found (they're |
* Jump into that directory first. |
* indexed relative to the root). |
|
*/ |
*/ |
|
if (op != OP_TEST && set_basedir(path_arg, 1) == 0) |
|
goto out; |
|
|
if (OP_UPDATE == op) { |
dba = nodb ? dba_new(128) : dba_read(MANDOC_DB); |
if (-1 == chdir(dir)) { |
if (dba != NULL) { |
perror(dir); |
/* |
exit((int)MANDOCLEVEL_SYSERR); |
* The existing database is usable. Process |
} |
* all files specified on the command-line. |
index_merge(of, mp, &dbuf, &buf, hash, |
*/ |
&mdb, &recs); |
use_all = 1; |
} |
for (i = 0; i < argc; i++) |
|
filescan(argv[i]); |
goto out; |
if (nodb == 0) |
} |
dbprune(dba); |
|
} else { |
/* |
/* Database missing or corrupt. */ |
* Configure the directories we're going to scan. |
if (op != OP_UPDATE || errno != ENOENT) |
* If we have command-line arguments, use them. |
say(MANDOC_DB, "%s: Automatically recreating" |
* If not, we use man(1)'s method (see mandocdb.8). |
" from scratch", strerror(errno)); |
*/ |
exitcode = (int)MANDOCLEVEL_OK; |
|
op = OP_DEFAULT; |
if (argc > 0) { |
if (treescan() == 0) |
dirs.paths = mandoc_calloc(argc, sizeof(char *)); |
|
dirs.sz = argc; |
|
for (i = 0; i < argc; i++) { |
|
if (NULL == (cp = realpath(argv[i], pbuf))) { |
|
perror(argv[i]); |
|
goto out; |
goto out; |
} |
dba = dba_new(128); |
dirs.paths[i] = mandoc_strdup(cp); |
|
} |
} |
} else |
if (op != OP_DELETE) |
manpath_parse(&dirs, dir, NULL, NULL); |
mpages_merge(dba, mp); |
|
if (nodb == 0) |
for (ipath = 0; ipath < dirs.sz; ipath++) { |
dbwrite(dba); |
|
dba_free(dba); |
|
} else { |
/* |
/* |
* Go to the root of the respective manual tree. |
* If we have arguments, use them as our manpaths. |
* This must work or no manuals may be found: |
* If we don't, use man.conf(5). |
* They are indexed relative to the root. |
|
*/ |
*/ |
|
if (argc > 0) { |
|
conf.manpath.paths = mandoc_reallocarray(NULL, |
|
argc, sizeof(char *)); |
|
conf.manpath.sz = (size_t)argc; |
|
for (i = 0; i < argc; i++) |
|
conf.manpath.paths[i] = mandoc_strdup(argv[i]); |
|
} else |
|
manconf_parse(&conf, path_arg, NULL, NULL); |
|
|
if (-1 == chdir(dirs.paths[ipath])) { |
if (conf.manpath.sz == 0) { |
perror(dirs.paths[ipath]); |
exitcode = (int)MANDOCLEVEL_BADARG; |
exit((int)MANDOCLEVEL_SYSERR); |
say("", "Empty manpath"); |
} |
} |
|
|
/* Create a new database in two temporary files. */ |
|
|
|
flags = O_CREAT | O_EXCL | O_RDWR; |
|
while (NULL == mdb.db) { |
|
strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX); |
|
strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX); |
|
if (NULL == mktemp(mdb.dbn)) { |
|
perror(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
mdb.db = dbopen(mdb.dbn, flags, 0644, |
|
DB_BTREE, &info); |
|
if (NULL == mdb.db && EEXIST != errno) { |
|
perror(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
while (NULL == mdb.idx) { |
|
strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX); |
|
strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX); |
|
if (NULL == mktemp(mdb.idxn)) { |
|
perror(mdb.idxn); |
|
unlink(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
mdb.idx = dbopen(mdb.idxn, flags, 0644, |
|
DB_RECNO, NULL); |
|
if (NULL == mdb.idx && EEXIST != errno) { |
|
perror(mdb.idxn); |
|
unlink(mdb.dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
|
|
|
/* |
/* |
* Search for manuals and fill the new database. |
* First scan the tree rooted at a base directory, then |
|
* build a new database and finally move it into place. |
|
* Ignore zero-length directories and strip trailing |
|
* slashes. |
*/ |
*/ |
|
for (j = 0; j < conf.manpath.sz; j++) { |
|
sz = strlen(conf.manpath.paths[j]); |
|
if (sz && conf.manpath.paths[j][sz - 1] == '/') |
|
conf.manpath.paths[j][--sz] = '\0'; |
|
if (sz == 0) |
|
continue; |
|
|
ofile_dirbuild(".", "", "", 0, &of); |
if (j) { |
|
mandoc_ohash_init(&mpages, 6, |
|
offsetof(struct mpage, inodev)); |
|
mandoc_ohash_init(&mlinks, 6, |
|
offsetof(struct mlink, file)); |
|
} |
|
|
if (NULL != of) { |
if (set_basedir(conf.manpath.paths[j], argc > 0) == 0) |
index_merge(of, mp, &dbuf, &buf, hash, |
continue; |
&mdb, &recs); |
if (treescan() == 0) |
ofile_free(of); |
continue; |
of = NULL; |
dba = dba_new(128); |
} |
mpages_merge(dba, mp); |
|
if (nodb == 0) |
|
dbwrite(dba); |
|
dba_free(dba); |
|
|
(*mdb.db->close)(mdb.db); |
if (j + 1 < conf.manpath.sz) { |
(*mdb.idx->close)(mdb.idx); |
mpages_free(); |
mdb.db = NULL; |
ohash_delete(&mpages); |
mdb.idx = NULL; |
ohash_delete(&mlinks); |
|
} |
/* |
|
* Replace the old database with the new one. |
|
* This is not perfectly atomic, |
|
* but i cannot think of a better way. |
|
*/ |
|
|
|
if (-1 == rename(mdb.dbn, MANDOC_DB)) { |
|
perror(MANDOC_DB); |
|
unlink(mdb.dbn); |
|
unlink(mdb.idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
} |
if (-1 == rename(mdb.idxn, MANDOC_IDX)) { |
|
perror(MANDOC_IDX); |
|
unlink(MANDOC_DB); |
|
unlink(MANDOC_IDX); |
|
unlink(mdb.idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
} |
} |
|
|
out: |
out: |
if (mdb.db) |
manconf_free(&conf); |
(*mdb.db->close)(mdb.db); |
mparse_free(mp); |
if (mdb.idx) |
mchars_free(); |
(*mdb.idx->close)(mdb.idx); |
mpages_free(); |
if (hash) |
ohash_delete(&mpages); |
(*hash->close)(hash); |
ohash_delete(&mlinks); |
if (mp) |
#if DEBUG_MEMORY |
mparse_free(mp); |
mandoc_d_finish(); |
|
#endif |
manpath_free(&dirs); |
return exitcode; |
ofile_free(of); |
|
free(buf.cp); |
|
free(dbuf.cp); |
|
free(recs.stack); |
|
|
|
return(MANDOCLEVEL_OK); |
|
|
|
usage: |
usage: |
fprintf(stderr, |
progname = getprogname(); |
"usage: %s [-aQvvv] [-C file] | dir ... | -t file ...\n" |
fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" |
" -d dir [file ...] | " |
" %s [-aDnpQ] [-Tutf8] dir ...\n" |
"-u dir [file ...]\n", |
" %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" |
progname); |
" %s [-Dnp] -u dir [file ...]\n" |
|
" %s [-Q] -t file ...\n", |
|
progname, progname, progname, progname, progname); |
|
|
return((int)MANDOCLEVEL_BADARG); |
return (int)MANDOCLEVEL_BADARG; |
} |
} |
|
|
void |
/* |
index_merge(const struct of *of, struct mparse *mp, |
* To get a singly linked list in alpha order while inserting entries |
struct buf *dbuf, struct buf *buf, DB *hash, |
* at the beginning, process directory entries in reverse alpha order. |
struct mdb *mdb, struct recs *recs) |
*/ |
|
static int |
|
#if HAVE_FTS_COMPARE_CONST |
|
fts_compare(const FTSENT *const *a, const FTSENT *const *b) |
|
#else |
|
fts_compare(const FTSENT **a, const FTSENT **b) |
|
#endif |
{ |
{ |
recno_t rec; |
return -strcmp((*a)->fts_name, (*b)->fts_name); |
int ch, skip; |
} |
DBT key, val; |
|
DB *files; /* temporary file name table */ |
|
struct mdoc *mdoc; |
|
struct man *man; |
|
const char *fn, *msec, *march, *mtitle; |
|
char *p; |
|
uint64_t mask; |
|
size_t sv; |
|
unsigned seq; |
|
uint64_t vbuf[2]; |
|
char type; |
|
|
|
static char emptystring[] = ""; |
/* |
|
* Scan a directory tree rooted at "basedir" for manpages. |
|
* We use fts(), scanning directory parts along the way for clues to our |
|
* section and architecture. |
|
* |
|
* If use_all has been specified, grok all files. |
|
* If not, sanitise paths to the following: |
|
* |
|
* [./]man*[/<arch>]/<name>.<section> |
|
* or |
|
* [./]cat<section>[/<arch>]/<name>.0 |
|
* |
|
* TODO: accommodate for multi-language directories. |
|
*/ |
|
static int |
|
treescan(void) |
|
{ |
|
char buf[PATH_MAX]; |
|
FTS *f; |
|
FTSENT *ff; |
|
struct mlink *mlink; |
|
int gzip; |
|
enum form dform; |
|
char *dsec, *arch, *fsec, *cp; |
|
const char *path; |
|
const char *argv[2]; |
|
|
if (warnings) { |
argv[0] = "."; |
files = NULL; |
argv[1] = NULL; |
hash_reset(&files); |
|
|
f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR, |
|
fts_compare); |
|
if (f == NULL) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say("", "&fts_open"); |
|
return 0; |
} |
} |
|
|
rec = 0; |
dsec = arch = NULL; |
for (of = of->first; of; of = of->next) { |
dform = FORM_NONE; |
fn = of->fname; |
|
|
|
/* |
while ((ff = fts_read(f)) != NULL) { |
* Try interpreting the file as mdoc(7) or man(7) |
path = ff->fts_path + 2; |
* source code, unless it is already known to be |
switch (ff->fts_info) { |
* formatted. Fall back to formatted mode. |
|
*/ |
|
|
|
mparse_reset(mp); |
|
mdoc = NULL; |
|
man = NULL; |
|
|
|
if ((MANDOC_SRC & of->src_form || |
|
! (MANDOC_FORM & of->src_form)) && |
|
MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) |
|
mparse_result(mp, &mdoc, &man); |
|
|
|
if (NULL != mdoc) { |
|
msec = mdoc_meta(mdoc)->msec; |
|
march = mdoc_meta(mdoc)->arch; |
|
if (NULL == march) |
|
march = ""; |
|
mtitle = mdoc_meta(mdoc)->title; |
|
} else if (NULL != man) { |
|
msec = man_meta(man)->msec; |
|
march = ""; |
|
mtitle = man_meta(man)->title; |
|
} else { |
|
msec = of->sec; |
|
march = of->arch; |
|
mtitle = of->title; |
|
} |
|
|
|
/* |
/* |
* Check whether the manual section given in a file |
* Symbolic links require various sanity checks, |
* agrees with the directory where the file is located. |
* then get handled just like regular files. |
* Some manuals have suffixes like (3p) on their |
|
* section number either inside the file or in the |
|
* directory name, some are linked into more than one |
|
* section, like encrypt(1) = makekey(8). Do not skip |
|
* manuals for such reasons. |
|
*/ |
*/ |
|
case FTS_SL: |
skip = 0; |
if (realpath(path, buf) == NULL) { |
assert(of->sec); |
if (warnings) |
assert(msec); |
say(path, "&realpath"); |
if (warnings) |
continue; |
if (strcasecmp(msec, of->sec)) |
|
fprintf(stderr, "%s: " |
|
"section \"%s\" manual " |
|
"in \"%s\" directory\n", |
|
fn, msec, of->sec); |
|
|
|
/* |
|
* Manual page directories exist for each kernel |
|
* architecture as returned by machine(1). |
|
* However, many manuals only depend on the |
|
* application architecture as returned by arch(1). |
|
* For example, some (2/ARM) manuals are shared |
|
* across the "armish" and "zaurus" kernel |
|
* architectures. |
|
* A few manuals are even shared across completely |
|
* different architectures, for example fdformat(1) |
|
* on amd64, i386, sparc, and sparc64. |
|
* Thus, warn about architecture mismatches, |
|
* but don't skip manuals for this reason. |
|
*/ |
|
|
|
assert(of->arch); |
|
assert(march); |
|
if (warnings) |
|
if (strcasecmp(march, of->arch)) |
|
fprintf(stderr, "%s: " |
|
"architecture \"%s\" manual " |
|
"in \"%s\" directory\n", |
|
fn, march, of->arch); |
|
|
|
/* |
|
* By default, skip a file if the title given |
|
* in the file disagrees with the file name. |
|
* Do not warn, this happens for all MLINKs. |
|
*/ |
|
|
|
assert(of->title); |
|
assert(mtitle); |
|
if (strcasecmp(mtitle, of->title)) |
|
skip = 1; |
|
|
|
/* |
|
* Build a title string for the file. If it matches |
|
* the location of the file, remember the title as |
|
* found; else, remember it as missing. |
|
*/ |
|
|
|
if (warnings) { |
|
buf->len = 0; |
|
buf_appendb(buf, mtitle, strlen(mtitle)); |
|
buf_appendb(buf, "(", 1); |
|
buf_appendb(buf, msec, strlen(msec)); |
|
if ('\0' != *march) { |
|
buf_appendb(buf, "/", 1); |
|
buf_appendb(buf, march, strlen(march)); |
|
} |
} |
buf_appendb(buf, ")", 2); |
if (strncmp(buf, basedir, basedir_len) != 0 |
for (p = buf->cp; '\0' != *p; p++) |
#ifdef READ_ALLOWED_PATH |
*p = tolower((unsigned char)*p); |
&& !read_allowed(buf) |
key.data = buf->cp; |
#endif |
key.size = buf->len; |
) { |
val.data = NULL; |
if (warnings) say("", |
val.size = 0; |
"%s: outside base directory", buf); |
if (0 == skip) |
continue; |
val.data = emptystring; |
|
else { |
|
ch = (*files->get)(files, &key, &val, 0); |
|
if (ch < 0) { |
|
perror("hash"); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} else if (ch > 0) { |
|
val.data = (void *)fn; |
|
val.size = strlen(fn) + 1; |
|
} else |
|
val.data = NULL; |
|
} |
} |
if (NULL != val.data && |
/* Use logical inode to avoid mpages dupe. */ |
(*files->put)(files, &key, &val, 0) < 0) { |
if (stat(path, ff->fts_statp) == -1) { |
perror("hash"); |
if (warnings) |
exit((int)MANDOCLEVEL_SYSERR); |
say(path, "&stat"); |
|
continue; |
} |
} |
} |
if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG) |
|
continue; |
|
/* FALLTHROUGH */ |
|
|
if (skip && !use_all) |
|
continue; |
|
|
|
/* |
/* |
* The index record value consists of a nil-terminated |
* If we're a regular file, add an mlink by using the |
* filename, a nil-terminated manual section, and a |
* stored directory data and handling the filename. |
* nil-terminated description. Use the actual |
|
* location of the file, such that the user can find |
|
* it with man(1). Since the description may not be |
|
* set, we set a sentinel to see if we're going to |
|
* write a nil byte in its place. |
|
*/ |
*/ |
|
case FTS_F: |
|
if ( ! strcmp(path, MANDOC_DB)) |
|
continue; |
|
if ( ! use_all && ff->fts_level < 2) { |
|
if (warnings) |
|
say(path, "Extraneous file"); |
|
continue; |
|
} |
|
gzip = 0; |
|
fsec = NULL; |
|
while (fsec == NULL) { |
|
fsec = strrchr(ff->fts_name, '.'); |
|
if (fsec == NULL || strcmp(fsec+1, "gz")) |
|
break; |
|
gzip = 1; |
|
*fsec = '\0'; |
|
fsec = NULL; |
|
} |
|
if (fsec == NULL) { |
|
if ( ! use_all) { |
|
if (warnings) |
|
say(path, |
|
"No filename suffix"); |
|
continue; |
|
} |
|
} else if ( ! strcmp(++fsec, "html")) { |
|
if (warnings) |
|
say(path, "Skip html"); |
|
continue; |
|
} else if ( ! strcmp(fsec, "ps")) { |
|
if (warnings) |
|
say(path, "Skip ps"); |
|
continue; |
|
} else if ( ! strcmp(fsec, "pdf")) { |
|
if (warnings) |
|
say(path, "Skip pdf"); |
|
continue; |
|
} else if ( ! use_all && |
|
((dform == FORM_SRC && |
|
strncmp(fsec, dsec, strlen(dsec))) || |
|
(dform == FORM_CAT && strcmp(fsec, "0")))) { |
|
if (warnings) |
|
say(path, "Wrong filename suffix"); |
|
continue; |
|
} else |
|
fsec[-1] = '\0'; |
|
|
dbuf->len = 0; |
mlink = mandoc_calloc(1, sizeof(struct mlink)); |
type = mdoc ? 'd' : (man ? 'a' : 'c'); |
if (strlcpy(mlink->file, path, |
buf_appendb(dbuf, &type, 1); |
sizeof(mlink->file)) >= |
buf_appendb(dbuf, fn, strlen(fn) + 1); |
sizeof(mlink->file)) { |
buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); |
say(path, "Filename too long"); |
buf_appendb(dbuf, of->title, strlen(of->title) + 1); |
free(mlink); |
buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); |
continue; |
|
} |
sv = dbuf->len; |
mlink->dform = dform; |
|
mlink->dsec = dsec; |
/* |
mlink->arch = arch; |
* Collect keyword/mask pairs. |
mlink->name = ff->fts_name; |
* Each pair will become a new btree node. |
mlink->fsec = fsec; |
*/ |
mlink->gzip = gzip; |
|
mlink_add(mlink, ff->fts_statp); |
hash_reset(&hash); |
|
if (mdoc) |
|
pmdoc_node(hash, buf, dbuf, |
|
mdoc_node(mdoc), mdoc_meta(mdoc)); |
|
else if (man) |
|
pman_node(hash, buf, dbuf, man_node(man)); |
|
else |
|
pformatted(hash, buf, dbuf, of); |
|
|
|
/* Test mode, do not access any database. */ |
|
|
|
if (NULL == mdb->db || NULL == mdb->idx) |
|
continue; |
continue; |
|
|
/* |
case FTS_D: |
* Make sure the file name is always registered |
case FTS_DP: |
* as an .Nm search key. |
break; |
*/ |
|
buf->len = 0; |
|
buf_append(buf, of->title); |
|
hash_put(hash, buf, TYPE_Nm); |
|
|
|
/* |
default: |
* Reclaim an empty index record, if available. |
if (warnings) |
* Use its record number for all new btree nodes. |
say(path, "Not a regular file"); |
*/ |
continue; |
|
|
if (recs->cur > 0) { |
|
recs->cur--; |
|
rec = recs->stack[(int)recs->cur]; |
|
} else if (recs->last > 0) { |
|
rec = recs->last; |
|
recs->last = 0; |
|
} else |
|
rec++; |
|
vbuf[1] = htobe64(rec); |
|
|
|
/* |
|
* Copy from the in-memory hashtable of pending |
|
* keyword/mask pairs into the database. |
|
*/ |
|
|
|
seq = R_FIRST; |
|
while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { |
|
seq = R_NEXT; |
|
assert(sizeof(uint64_t) == val.size); |
|
memcpy(&mask, val.data, val.size); |
|
vbuf[0] = htobe64(mask); |
|
val.size = sizeof(vbuf); |
|
val.data = &vbuf; |
|
dbt_put(mdb->db, mdb->dbn, &key, &val); |
|
} |
} |
if (ch < 0) { |
|
perror("hash"); |
|
unlink(mdb->dbn); |
|
unlink(mdb->idxn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
} |
|
|
|
/* |
switch (ff->fts_level) { |
* Apply to the index. If we haven't had a description |
case 0: |
* set, put an empty one in now. |
/* Ignore the root directory. */ |
*/ |
break; |
|
case 1: |
|
/* |
|
* This might contain manX/ or catX/. |
|
* Try to infer this from the name. |
|
* If we're not in use_all, enforce it. |
|
*/ |
|
cp = ff->fts_name; |
|
if (ff->fts_info == FTS_DP) { |
|
dform = FORM_NONE; |
|
dsec = NULL; |
|
break; |
|
} |
|
|
if (dbuf->len == sv) |
if ( ! strncmp(cp, "man", 3)) { |
buf_appendb(dbuf, "", 1); |
dform = FORM_SRC; |
|
dsec = cp + 3; |
|
} else if ( ! strncmp(cp, "cat", 3)) { |
|
dform = FORM_CAT; |
|
dsec = cp + 3; |
|
} else { |
|
dform = FORM_NONE; |
|
dsec = NULL; |
|
} |
|
|
key.data = &rec; |
if (dsec != NULL || use_all) |
key.size = sizeof(recno_t); |
break; |
|
|
val.data = dbuf->cp; |
if (warnings) |
val.size = dbuf->len; |
say(path, "Unknown directory part"); |
|
fts_set(f, ff, FTS_SKIP); |
if (verb) |
break; |
printf("%s: adding to index\n", fn); |
case 2: |
|
/* |
dbt_put(mdb->idx, mdb->idxn, &key, &val); |
* Possibly our architecture. |
} |
* If we're descending, keep tabs on it. |
|
*/ |
/* |
if (ff->fts_info != FTS_DP && dsec != NULL) |
* Iterate the remembered file titles and check that |
arch = ff->fts_name; |
* all files can be found by their main title. |
else |
*/ |
arch = NULL; |
|
break; |
if (warnings) { |
default: |
seq = R_FIRST; |
if (ff->fts_info == FTS_DP || use_all) |
while (0 == (*files->seq)(files, &key, &val, seq)) { |
break; |
seq = R_NEXT; |
if (warnings) |
if (val.size) |
say(path, "Extraneous directory part"); |
fprintf(stderr, "%s: probably " |
fts_set(f, ff, FTS_SKIP); |
"unreachable, title is %s\n", |
break; |
(char *)val.data, (char *)key.data); |
|
} |
} |
(*files->close)(files); |
|
} |
} |
|
|
|
fts_close(f); |
|
return 1; |
} |
} |
|
|
/* |
/* |
* Scan through all entries in the index file `idx' and prune those |
* Add a file to the mlinks table. |
* entries in `ofile'. |
* Do not verify that it's a "valid" looking manpage (we'll do that |
* Pruning consists of removing from `db', then invalidating the entry |
* later). |
* in `idx' (zeroing its value size). |
* |
|
* Try to infer the manual section, architecture, and page name from the |
|
* path, assuming it looks like |
|
* |
|
* [./]man*[/<arch>]/<name>.<section> |
|
* or |
|
* [./]cat<section>[/<arch>]/<name>.0 |
|
* |
|
* See treescan() for the fts(3) version of this. |
*/ |
*/ |
static void |
static void |
index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) |
filescan(const char *infile) |
{ |
{ |
const struct of *of; |
struct stat st; |
const char *fn; |
struct mlink *mlink; |
uint64_t vbuf[2]; |
char *linkfile, *p, *realdir, *start, *usefile; |
unsigned seq, sseq; |
size_t realdir_len; |
DBT key, val; |
|
int ch; |
|
|
|
recs->cur = 0; |
assert(use_all); |
seq = R_FIRST; |
|
while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { |
|
seq = R_NEXT; |
|
assert(sizeof(recno_t) == key.size); |
|
memcpy(&recs->last, key.data, key.size); |
|
|
|
/* Deleted records are zero-sized. Skip them. */ |
if (strncmp(infile, "./", 2) == 0) |
|
infile += 2; |
|
|
if (0 == val.size) |
/* |
goto cont; |
* We have to do lstat(2) before realpath(3) loses |
|
* the information whether this is a symbolic link. |
|
* We need to know that because for symbolic links, |
|
* we want to use the orginal file name, while for |
|
* regular files, we want to use the real path. |
|
*/ |
|
if (lstat(infile, &st) == -1) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(infile, "&lstat"); |
|
return; |
|
} else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(infile, "Not a regular file"); |
|
return; |
|
} |
|
|
/* |
/* |
* Make sure we're sane. |
* We have to resolve the file name to the real path |
* Read past our mdoc/man/cat type to the next string, |
* in any case for the base directory check. |
* then make sure it's bounded by a NUL. |
*/ |
* Failing any of these, we go into our error handler. |
if ((usefile = realpath(infile, NULL)) == NULL) { |
*/ |
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(infile, "&realpath"); |
|
return; |
|
} |
|
|
fn = (char *)val.data + 1; |
if (op == OP_TEST) |
if (NULL == memchr(fn, '\0', val.size - 1)) |
start = usefile; |
|
else if (strncmp(usefile, basedir, basedir_len) == 0) |
|
start = usefile + basedir_len; |
|
#ifdef READ_ALLOWED_PATH |
|
else if (read_allowed(usefile)) |
|
start = usefile; |
|
#endif |
|
else { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say("", "%s: outside base directory", infile); |
|
free(usefile); |
|
return; |
|
} |
|
|
|
/* |
|
* Now we are sure the file is inside our tree. |
|
* If it is a symbolic link, ignore the real path |
|
* and use the original name. |
|
*/ |
|
do { |
|
if (S_ISLNK(st.st_mode) == 0) |
break; |
break; |
|
|
/* |
/* |
* Search for the file in those we care about. |
* Some implementations of realpath(3) may succeed |
* XXX: build this into a tree. Too slow. |
* even if the target of the link does not exist, |
|
* so check again for extra safety. |
*/ |
*/ |
|
if (stat(usefile, &st) == -1) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(infile, "&stat"); |
|
free(usefile); |
|
return; |
|
} |
|
linkfile = mandoc_strdup(infile); |
|
if (op == OP_TEST) { |
|
free(usefile); |
|
start = usefile = linkfile; |
|
break; |
|
} |
|
if (strncmp(infile, basedir, basedir_len) == 0) { |
|
free(usefile); |
|
usefile = linkfile; |
|
start = usefile + basedir_len; |
|
break; |
|
} |
|
|
for (of = ofile->first; of; of = of->next) |
|
if (0 == strcmp(fn, of->fname)) |
|
break; |
|
|
|
if (NULL == of) |
|
continue; |
|
|
|
/* |
/* |
* Search through the keyword database, throwing out all |
* This symbolic link points into the basedir |
* references to our file. |
* from the outside. Let's see whether any of |
|
* the parent directories resolve to the basedir. |
*/ |
*/ |
|
p = strchr(linkfile, '\0'); |
sseq = R_FIRST; |
do { |
while (0 == (ch = (*mdb->db->seq)(mdb->db, |
while (*--p != '/') |
&key, &val, sseq))) { |
|
sseq = R_NEXT; |
|
if (sizeof(vbuf) != val.size) |
|
break; |
|
|
|
memcpy(vbuf, val.data, val.size); |
|
if (recs->last != betoh64(vbuf[1])) |
|
continue; |
continue; |
|
*p = '\0'; |
|
if ((realdir = realpath(linkfile, NULL)) == NULL) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say(infile, "&realpath"); |
|
free(linkfile); |
|
free(usefile); |
|
return; |
|
} |
|
realdir_len = strlen(realdir) + 1; |
|
free(realdir); |
|
*p = '/'; |
|
} while (realdir_len > basedir_len); |
|
|
if ((ch = (*mdb->db->del)(mdb->db, |
/* |
&key, R_CURSOR)) < 0) |
* If one of the directories resolves to the basedir, |
break; |
* use the rest of the original name. |
|
* Otherwise, the best we can do |
|
* is to use the filename pointed to. |
|
*/ |
|
if (realdir_len == basedir_len) { |
|
free(usefile); |
|
usefile = linkfile; |
|
start = p + 1; |
|
} else { |
|
free(linkfile); |
|
start = usefile + basedir_len; |
} |
} |
|
} while (/* CONSTCOND */ 0); |
|
|
if (ch < 0) { |
mlink = mandoc_calloc(1, sizeof(struct mlink)); |
perror(mdb->dbn); |
mlink->dform = FORM_NONE; |
exit((int)MANDOCLEVEL_SYSERR); |
if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= |
} else if (1 != ch) { |
sizeof(mlink->file)) { |
fprintf(stderr, "%s: corrupt database\n", |
say(start, "Filename too long"); |
mdb->dbn); |
free(mlink); |
exit((int)MANDOCLEVEL_SYSERR); |
free(usefile); |
} |
return; |
|
} |
|
|
if (verb) |
/* |
printf("%s: deleting from index\n", fn); |
* In test mode or when the original name is absolute |
|
* but outside our tree, guess the base directory. |
|
*/ |
|
|
val.size = 0; |
if (op == OP_TEST || (start == usefile && *start == '/')) { |
ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); |
if (strncmp(usefile, "man/", 4) == 0) |
|
start = usefile + 4; |
|
else if ((start = strstr(usefile, "/man/")) != NULL) |
|
start += 5; |
|
else |
|
start = usefile; |
|
} |
|
|
if (ch < 0) |
/* |
break; |
* First try to guess our directory structure. |
cont: |
* If we find a separator, try to look for man* or cat*. |
if (recs->cur >= recs->size) { |
* If we find one of these and what's underneath is a directory, |
recs->size += MANDOC_SLOP; |
* assume it's an architecture. |
recs->stack = mandoc_realloc(recs->stack, |
*/ |
recs->size * sizeof(recno_t)); |
if ((p = strchr(start, '/')) != NULL) { |
|
*p++ = '\0'; |
|
if (strncmp(start, "man", 3) == 0) { |
|
mlink->dform = FORM_SRC; |
|
mlink->dsec = start + 3; |
|
} else if (strncmp(start, "cat", 3) == 0) { |
|
mlink->dform = FORM_CAT; |
|
mlink->dsec = start + 3; |
} |
} |
|
|
recs->stack[(int)recs->cur] = recs->last; |
start = p; |
recs->cur++; |
if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) { |
|
*p++ = '\0'; |
|
mlink->arch = start; |
|
start = p; |
|
} |
} |
} |
|
|
if (ch < 0) { |
/* |
perror(mdb->idxn); |
* Now check the file suffix. |
exit((int)MANDOCLEVEL_SYSERR); |
* Suffix of `.0' indicates a catpage, `.1-9' is a manpage. |
} else if (1 != ch) { |
*/ |
fprintf(stderr, "%s: corrupt index\n", mdb->idxn); |
p = strrchr(start, '\0'); |
exit((int)MANDOCLEVEL_SYSERR); |
while (p-- > start && *p != '/' && *p != '.') |
|
continue; |
|
|
|
if (*p == '.') { |
|
*p++ = '\0'; |
|
mlink->fsec = p; |
} |
} |
|
|
recs->last++; |
/* |
|
* Now try to parse the name. |
|
* Use the filename portion of the path. |
|
*/ |
|
mlink->name = start; |
|
if ((p = strrchr(start, '/')) != NULL) { |
|
mlink->name = p + 1; |
|
*p = '\0'; |
|
} |
|
mlink_add(mlink, &st); |
|
free(usefile); |
} |
} |
|
|
/* |
|
* Grow the buffer (if necessary) and copy in a binary string. |
|
*/ |
|
static void |
static void |
buf_appendb(struct buf *buf, const void *cp, size_t sz) |
mlink_add(struct mlink *mlink, const struct stat *st) |
{ |
{ |
|
struct inodev inodev; |
|
struct mpage *mpage; |
|
unsigned int slot; |
|
|
/* Overshoot by MANDOC_BUFSZ. */ |
assert(NULL != mlink->file); |
|
|
while (buf->len + sz >= buf->size) { |
mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); |
buf->size = buf->len + sz + MANDOC_BUFSZ; |
mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); |
buf->cp = mandoc_realloc(buf->cp, buf->size); |
mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); |
} |
mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); |
|
|
memcpy(buf->cp + (int)buf->len, cp, sz); |
if ('0' == *mlink->fsec) { |
buf->len += sz; |
free(mlink->fsec); |
|
mlink->fsec = mandoc_strdup(mlink->dsec); |
|
mlink->fform = FORM_CAT; |
|
} else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) |
|
mlink->fform = FORM_SRC; |
|
else |
|
mlink->fform = FORM_NONE; |
|
|
|
slot = ohash_qlookup(&mlinks, mlink->file); |
|
assert(NULL == ohash_find(&mlinks, slot)); |
|
ohash_insert(&mlinks, slot, mlink); |
|
|
|
memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ |
|
inodev.st_ino = st->st_ino; |
|
inodev.st_dev = st->st_dev; |
|
slot = ohash_lookup_memory(&mpages, (char *)&inodev, |
|
sizeof(struct inodev), inodev.st_ino); |
|
mpage = ohash_find(&mpages, slot); |
|
if (NULL == mpage) { |
|
mpage = mandoc_calloc(1, sizeof(struct mpage)); |
|
mpage->inodev.st_ino = inodev.st_ino; |
|
mpage->inodev.st_dev = inodev.st_dev; |
|
mpage->form = FORM_NONE; |
|
mpage->next = mpage_head; |
|
mpage_head = mpage; |
|
ohash_insert(&mpages, slot, mpage); |
|
} else |
|
mlink->next = mpage->mlinks; |
|
mpage->mlinks = mlink; |
|
mlink->mpage = mpage; |
} |
} |
|
|
/* |
|
* Append a nil-terminated string to the buffer. |
|
* This can be invoked multiple times. |
|
* The buffer string will be nil-terminated. |
|
* If invoked multiple times, a space is put between strings. |
|
*/ |
|
static void |
static void |
buf_append(struct buf *buf, const char *cp) |
mlink_free(struct mlink *mlink) |
{ |
{ |
size_t sz; |
|
|
|
if (0 == (sz = strlen(cp))) |
free(mlink->dsec); |
return; |
free(mlink->arch); |
|
free(mlink->name); |
if (buf->len) |
free(mlink->fsec); |
buf->cp[(int)buf->len - 1] = ' '; |
free(mlink); |
|
|
buf_appendb(buf, cp, sz + 1); |
|
} |
} |
|
|
/* |
|
* Recursively add all text from a given node. |
|
* This is optimised for general mdoc nodes in this context, which do |
|
* not consist of subexpressions and having a recursive call for n->next |
|
* would be wasteful. |
|
* The "f" variable should be 0 unless called from pmdoc_Nd for the |
|
* description buffer, which does not start at the beginning of the |
|
* buffer. |
|
*/ |
|
static void |
static void |
buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) |
mpages_free(void) |
{ |
{ |
|
struct mpage *mpage; |
|
struct mlink *mlink; |
|
|
for ( ; n; n = n->next) { |
while ((mpage = mpage_head) != NULL) { |
if (n->child) |
while ((mlink = mpage->mlinks) != NULL) { |
buf_appendmdoc(buf, n->child, f); |
mpage->mlinks = mlink->next; |
|
mlink_free(mlink); |
if (MDOC_TEXT == n->type && f) { |
} |
f = 0; |
mpage_head = mpage->next; |
buf_appendb(buf, n->string, |
free(mpage->sec); |
strlen(n->string) + 1); |
free(mpage->arch); |
} else if (MDOC_TEXT == n->type) |
free(mpage->title); |
buf_append(buf, n->string); |
free(mpage->desc); |
|
free(mpage); |
} |
} |
} |
} |
|
|
|
/* |
|
* For each mlink to the mpage, check whether the path looks like |
|
* it is formatted, and if it does, check whether a source manual |
|
* exists by the same name, ignoring the suffix. |
|
* If both conditions hold, drop the mlink. |
|
*/ |
static void |
static void |
hash_reset(DB **db) |
mlinks_undupe(struct mpage *mpage) |
{ |
{ |
DB *hash; |
char buf[PATH_MAX]; |
|
struct mlink **prev; |
|
struct mlink *mlink; |
|
char *bufp; |
|
|
if (NULL != (hash = *db)) |
mpage->form = FORM_CAT; |
(*hash->close)(hash); |
prev = &mpage->mlinks; |
|
while (NULL != (mlink = *prev)) { |
*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); |
if (FORM_CAT != mlink->dform) { |
if (NULL == *db) { |
mpage->form = FORM_NONE; |
perror("hash"); |
goto nextlink; |
exit((int)MANDOCLEVEL_SYSERR); |
} |
|
(void)strlcpy(buf, mlink->file, sizeof(buf)); |
|
bufp = strstr(buf, "cat"); |
|
assert(NULL != bufp); |
|
memcpy(bufp, "man", 3); |
|
if (NULL != (bufp = strrchr(buf, '.'))) |
|
*++bufp = '\0'; |
|
(void)strlcat(buf, mlink->dsec, sizeof(buf)); |
|
if (NULL == ohash_find(&mlinks, |
|
ohash_qlookup(&mlinks, buf))) |
|
goto nextlink; |
|
if (warnings) |
|
say(mlink->file, "Man source exists: %s", buf); |
|
if (use_all) |
|
goto nextlink; |
|
*prev = mlink->next; |
|
mlink_free(mlink); |
|
continue; |
|
nextlink: |
|
prev = &(*prev)->next; |
} |
} |
} |
} |
|
|
/* ARGSUSED */ |
static void |
static int |
mlink_check(struct mpage *mpage, struct mlink *mlink) |
pmdoc_head(MDOC_ARGS) |
|
{ |
{ |
|
struct str *str; |
|
unsigned int slot; |
|
|
return(MDOC_HEAD == n->type); |
/* |
} |
* Check whether the manual section given in a file |
|
* agrees with the directory where the file is located. |
|
* Some manuals have suffixes like (3p) on their |
|
* section number either inside the file or in the |
|
* directory name, some are linked into more than one |
|
* section, like encrypt(1) = makekey(8). |
|
*/ |
|
|
/* ARGSUSED */ |
if (FORM_SRC == mpage->form && |
static int |
strcasecmp(mpage->sec, mlink->dsec)) |
pmdoc_body(MDOC_ARGS) |
say(mlink->file, "Section \"%s\" manual in %s directory", |
{ |
mpage->sec, mlink->dsec); |
|
|
return(MDOC_BODY == n->type); |
/* |
} |
* Manual page directories exist for each kernel |
|
* architecture as returned by machine(1). |
|
* However, many manuals only depend on the |
|
* application architecture as returned by arch(1). |
|
* For example, some (2/ARM) manuals are shared |
|
* across the "armish" and "zaurus" kernel |
|
* architectures. |
|
* A few manuals are even shared across completely |
|
* different architectures, for example fdformat(1) |
|
* on amd64, i386, and sparc64. |
|
*/ |
|
|
/* ARGSUSED */ |
if (strcasecmp(mpage->arch, mlink->arch)) |
static int |
say(mlink->file, "Architecture \"%s\" manual in " |
pmdoc_Fd(MDOC_ARGS) |
"\"%s\" directory", mpage->arch, mlink->arch); |
{ |
|
const char *start, *end; |
|
size_t sz; |
|
|
|
if (SEC_SYNOPSIS != n->sec) |
|
return(0); |
|
if (NULL == (n = n->child) || MDOC_TEXT != n->type) |
|
return(0); |
|
|
|
/* |
/* |
* Only consider those `Fd' macro fields that begin with an |
* XXX |
* "inclusion" token (versus, e.g., #define). |
* parse_cat() doesn't set NAME_TITLE yet. |
*/ |
*/ |
if (strcmp("#include", n->string)) |
|
return(0); |
|
|
|
if (NULL == (n = n->next) || MDOC_TEXT != n->type) |
if (FORM_CAT == mpage->form) |
return(0); |
return; |
|
|
/* |
/* |
* Strip away the enclosing angle brackets and make sure we're |
* Check whether this mlink |
* not zero-length. |
* appears as a name in the NAME section. |
*/ |
*/ |
|
|
start = n->string; |
slot = ohash_qlookup(&names, mlink->name); |
if ('<' == *start || '"' == *start) |
str = ohash_find(&names, slot); |
start++; |
assert(NULL != str); |
|
if ( ! (NAME_TITLE & str->mask)) |
if (0 == (sz = strlen(start))) |
say(mlink->file, "Name missing in NAME section"); |
return(0); |
|
|
|
end = &start[(int)sz - 1]; |
|
if ('>' == *end || '"' == *end) |
|
end--; |
|
|
|
assert(end >= start); |
|
|
|
buf_appendb(buf, start, (size_t)(end - start + 1)); |
|
buf_appendb(buf, "", 1); |
|
return(1); |
|
} |
} |
|
|
/* ARGSUSED */ |
/* |
static int |
* Run through the files in the global vector "mpages" |
pmdoc_In(MDOC_ARGS) |
* and add them to the database specified in "basedir". |
|
* |
|
* This handles the parsing scheme itself, using the cues of directory |
|
* and filename to determine whether the file is parsable or not. |
|
*/ |
|
static void |
|
mpages_merge(struct dba *dba, struct mparse *mp) |
{ |
{ |
|
struct mpage *mpage, *mpage_dest; |
|
struct mlink *mlink, *mlink_dest; |
|
struct roff_meta *meta; |
|
char *cp; |
|
int fd; |
|
|
if (NULL == n->child || MDOC_TEXT != n->child->type) |
for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) { |
return(0); |
mlinks_undupe(mpage); |
|
if ((mlink = mpage->mlinks) == NULL) |
|
continue; |
|
|
buf_append(buf, n->child->string); |
name_mask = NAME_MASK; |
return(1); |
mandoc_ohash_init(&names, 4, offsetof(struct str, key)); |
} |
mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); |
|
mparse_reset(mp); |
|
meta = NULL; |
|
|
/* ARGSUSED */ |
if ((fd = mparse_open(mp, mlink->file)) == -1) { |
static int |
say(mlink->file, "&open"); |
pmdoc_Fn(MDOC_ARGS) |
goto nextpage; |
{ |
} |
struct mdoc_node *nn; |
|
const char *cp; |
|
|
|
nn = n->child; |
/* |
|
* Interpret the file as mdoc(7) or man(7) source |
|
* code, unless it is known to be formatted. |
|
*/ |
|
if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { |
|
mparse_readfd(mp, fd, mlink->file); |
|
close(fd); |
|
fd = -1; |
|
meta = mparse_result(mp); |
|
} |
|
|
if (NULL == nn || MDOC_TEXT != nn->type) |
if (meta != NULL && meta->sodest != NULL) { |
return(0); |
mlink_dest = ohash_find(&mlinks, |
|
ohash_qlookup(&mlinks, meta->sodest)); |
|
if (mlink_dest == NULL) { |
|
mandoc_asprintf(&cp, "%s.gz", meta->sodest); |
|
mlink_dest = ohash_find(&mlinks, |
|
ohash_qlookup(&mlinks, cp)); |
|
free(cp); |
|
} |
|
if (mlink_dest != NULL) { |
|
|
/* .Fn "struct type *name" "char *arg" */ |
/* The .so target exists. */ |
|
|
cp = strrchr(nn->string, ' '); |
mpage_dest = mlink_dest->mpage; |
if (NULL == cp) |
while (1) { |
cp = nn->string; |
mlink->mpage = mpage_dest; |
|
|
/* Strip away pointer symbol. */ |
/* |
|
* If the target was already |
|
* processed, add the links |
|
* to the database now. |
|
* Otherwise, this will |
|
* happen when we come |
|
* to the target. |
|
*/ |
|
|
while ('*' == *cp) |
if (mpage_dest->dba != NULL) |
cp++; |
dbadd_mlink(mlink); |
|
|
/* Store the function name. */ |
if (mlink->next == NULL) |
|
break; |
|
mlink = mlink->next; |
|
} |
|
|
buf_append(buf, cp); |
/* Move all links to the target. */ |
hash_put(hash, buf, TYPE_Fn); |
|
|
|
/* Store the function type. */ |
mlink->next = mlink_dest->next; |
|
mlink_dest->next = mpage->mlinks; |
|
mpage->mlinks = NULL; |
|
goto nextpage; |
|
} |
|
meta->macroset = MACROSET_NONE; |
|
} |
|
if (meta != NULL && meta->macroset == MACROSET_MDOC) { |
|
mpage->form = FORM_SRC; |
|
mpage->sec = meta->msec; |
|
mpage->sec = mandoc_strdup( |
|
mpage->sec == NULL ? "" : mpage->sec); |
|
mpage->arch = meta->arch; |
|
mpage->arch = mandoc_strdup( |
|
mpage->arch == NULL ? "" : mpage->arch); |
|
mpage->title = mandoc_strdup(meta->title); |
|
} else if (meta != NULL && meta->macroset == MACROSET_MAN) { |
|
if (*meta->msec != '\0' || *meta->title != '\0') { |
|
mpage->form = FORM_SRC; |
|
mpage->sec = mandoc_strdup(meta->msec); |
|
mpage->arch = mandoc_strdup(mlink->arch); |
|
mpage->title = mandoc_strdup(meta->title); |
|
} else |
|
meta = NULL; |
|
} |
|
|
if (nn->string < cp) { |
assert(mpage->desc == NULL); |
buf->len = 0; |
if (meta == NULL || meta->sodest != NULL) { |
buf_appendb(buf, nn->string, cp - nn->string); |
mpage->sec = mandoc_strdup(mlink->dsec); |
buf_appendb(buf, "", 1); |
mpage->arch = mandoc_strdup(mlink->arch); |
hash_put(hash, buf, TYPE_Ft); |
mpage->title = mandoc_strdup(mlink->name); |
} |
if (meta == NULL) { |
|
mpage->form = FORM_CAT; |
|
parse_cat(mpage, fd); |
|
} else |
|
mpage->form = FORM_SRC; |
|
} else if (meta->macroset == MACROSET_MDOC) |
|
parse_mdoc(mpage, meta, meta->first); |
|
else |
|
parse_man(mpage, meta, meta->first); |
|
if (mpage->desc == NULL) { |
|
mpage->desc = mandoc_strdup(mlink->name); |
|
if (warnings) |
|
say(mlink->file, "No one-line description, " |
|
"using filename \"%s\"", mlink->name); |
|
} |
|
|
/* Store the arguments. */ |
for (mlink = mpage->mlinks; |
|
mlink != NULL; |
|
mlink = mlink->next) { |
|
putkey(mpage, mlink->name, NAME_FILE); |
|
if (warnings && !use_all) |
|
mlink_check(mpage, mlink); |
|
} |
|
|
for (nn = nn->next; nn; nn = nn->next) { |
dbadd(dba, mpage); |
if (MDOC_TEXT != nn->type) |
|
continue; |
|
buf->len = 0; |
|
buf_append(buf, nn->string); |
|
hash_put(hash, buf, TYPE_Fa); |
|
} |
|
|
|
return(0); |
nextpage: |
|
ohash_delete(&strings); |
|
ohash_delete(&names); |
|
} |
} |
} |
|
|
/* ARGSUSED */ |
static void |
static int |
parse_cat(struct mpage *mpage, int fd) |
pmdoc_St(MDOC_ARGS) |
|
{ |
{ |
|
FILE *stream; |
|
struct mlink *mlink; |
|
char *line, *p, *title, *sec; |
|
size_t linesz, plen, titlesz; |
|
ssize_t len; |
|
int offs; |
|
|
if (NULL == n->child || MDOC_TEXT != n->child->type) |
mlink = mpage->mlinks; |
return(0); |
stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r"); |
|
if (stream == NULL) { |
|
if (fd != -1) |
|
close(fd); |
|
if (warnings) |
|
say(mlink->file, "&fopen"); |
|
return; |
|
} |
|
|
buf_append(buf, n->child->string); |
line = NULL; |
return(1); |
linesz = 0; |
} |
|
|
|
/* ARGSUSED */ |
/* Parse the section number from the header line. */ |
static int |
|
pmdoc_Xr(MDOC_ARGS) |
|
{ |
|
|
|
if (NULL == (n = n->child)) |
while (getline(&line, &linesz, stream) != -1) { |
return(0); |
if (*line == '\n') |
|
continue; |
|
if ((sec = strchr(line, '(')) == NULL) |
|
break; |
|
if ((p = strchr(++sec, ')')) == NULL) |
|
break; |
|
free(mpage->sec); |
|
mpage->sec = mandoc_strndup(sec, p - sec); |
|
if (warnings && *mlink->dsec != '\0' && |
|
strcasecmp(mpage->sec, mlink->dsec)) |
|
say(mlink->file, |
|
"Section \"%s\" manual in %s directory", |
|
mpage->sec, mlink->dsec); |
|
break; |
|
} |
|
|
buf_appendb(buf, n->string, strlen(n->string)); |
/* Skip to first blank line. */ |
|
|
if (NULL != (n = n->next)) { |
while (line == NULL || *line != '\n') |
buf_appendb(buf, ".", 1); |
if (getline(&line, &linesz, stream) == -1) |
buf_appendb(buf, n->string, strlen(n->string) + 1); |
break; |
} else |
|
buf_appendb(buf, ".", 2); |
|
|
|
return(1); |
/* |
} |
* Assume the first line that is not indented |
|
* is the first section header. Skip to it. |
|
*/ |
|
|
/* ARGSUSED */ |
while (getline(&line, &linesz, stream) != -1) |
static int |
if (*line != '\n' && *line != ' ') |
pmdoc_Nd(MDOC_ARGS) |
break; |
{ |
|
|
|
if (MDOC_BODY != n->type) |
/* |
return(0); |
* Read up until the next section into a buffer. |
|
* Strip the leading and trailing newline from each read line, |
|
* appending a trailing space. |
|
* Ignore empty (whitespace-only) lines. |
|
*/ |
|
|
buf_appendmdoc(dbuf, n->child, 1); |
titlesz = 0; |
return(1); |
title = NULL; |
} |
|
|
|
/* ARGSUSED */ |
while ((len = getline(&line, &linesz, stream)) != -1) { |
static int |
if (*line != ' ') |
pmdoc_Nm(MDOC_ARGS) |
break; |
{ |
offs = 0; |
|
while (isspace((unsigned char)line[offs])) |
|
offs++; |
|
if (line[offs] == '\0') |
|
continue; |
|
title = mandoc_realloc(title, titlesz + len - offs); |
|
memcpy(title + titlesz, line + offs, len - offs); |
|
titlesz += len - offs; |
|
title[titlesz - 1] = ' '; |
|
} |
|
free(line); |
|
|
if (SEC_NAME == n->sec) |
/* |
return(1); |
* If no page content can be found, or the input line |
else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) |
* is already the next section header, or there is no |
return(0); |
* trailing newline, reuse the page title as the page |
|
* description. |
|
*/ |
|
|
if (NULL == n->child) |
if (NULL == title || '\0' == *title) { |
buf_append(buf, m->name); |
if (warnings) |
|
say(mlink->file, "Cannot find NAME section"); |
|
fclose(stream); |
|
free(title); |
|
return; |
|
} |
|
|
return(1); |
title[titlesz - 1] = '\0'; |
} |
|
|
|
/* ARGSUSED */ |
/* |
static int |
* Skip to the first dash. |
pmdoc_Sh(MDOC_ARGS) |
* Use the remaining line as the description (no more than 70 |
{ |
* bytes). |
|
*/ |
|
|
return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); |
if (NULL != (p = strstr(title, "- "))) { |
} |
for (p += 2; ' ' == *p || '\b' == *p; p++) |
|
/* Skip to next word. */ ; |
|
} else { |
|
if (warnings) |
|
say(mlink->file, "No dash in title line, " |
|
"reusing \"%s\" as one-line description", title); |
|
p = title; |
|
} |
|
|
static void |
plen = strlen(p); |
hash_put(DB *db, const struct buf *buf, uint64_t mask) |
|
{ |
|
uint64_t oldmask; |
|
DBT key, val; |
|
int rc; |
|
|
|
if (buf->len < 2) |
/* Strip backspace-encoding from line. */ |
return; |
|
|
|
key.data = buf->cp; |
while (NULL != (line = memchr(p, '\b', plen))) { |
key.size = buf->len; |
len = line - p; |
|
if (0 == len) { |
if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { |
memmove(line, line + 1, plen--); |
perror("hash"); |
continue; |
exit((int)MANDOCLEVEL_SYSERR); |
} |
} else if (0 == rc) { |
memmove(line - 1, line + 1, plen - len); |
assert(sizeof(uint64_t) == val.size); |
plen -= 2; |
memcpy(&oldmask, val.data, val.size); |
|
mask |= oldmask; |
|
} |
} |
|
|
val.data = &mask; |
/* |
val.size = sizeof(uint64_t); |
* Cut off excessive one-line descriptions. |
|
* Bad pages are not worth better heuristics. |
|
*/ |
|
|
if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { |
mpage->desc = mandoc_strndup(p, 150); |
perror("hash"); |
fclose(stream); |
exit((int)MANDOCLEVEL_SYSERR); |
free(title); |
} |
|
} |
} |
|
|
|
/* |
|
* Put a type/word pair into the word database for this particular file. |
|
*/ |
static void |
static void |
dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) |
putkey(const struct mpage *mpage, char *value, uint64_t type) |
{ |
{ |
|
putkeys(mpage, value, strlen(value), type); |
assert(key->size); |
|
assert(val->size); |
|
|
|
if (0 == (*db->put)(db, key, val, 0)) |
|
return; |
|
|
|
perror(dbn); |
|
exit((int)MANDOCLEVEL_SYSERR); |
|
/* NOTREACHED */ |
|
} |
} |
|
|
/* |
/* |
* Call out to per-macro handlers after clearing the persistent database |
* Grok all nodes at or below a certain mdoc node into putkey(). |
* key. If the macro sets the database key, flush it to the database. |
|
*/ |
*/ |
static void |
static void |
pmdoc_node(MDOC_ARGS) |
putmdockey(const struct mpage *mpage, |
|
const struct roff_node *n, uint64_t m, int taboo) |
{ |
{ |
|
|
if (NULL == n) |
for ( ; NULL != n; n = n->next) { |
return; |
if (n->flags & taboo) |
|
continue; |
switch (n->type) { |
if (NULL != n->child) |
case (MDOC_HEAD): |
putmdockey(mpage, n->child, m, taboo); |
/* FALLTHROUGH */ |
if (n->type == ROFFT_TEXT) |
case (MDOC_BODY): |
putkey(mpage, n->string, m); |
/* FALLTHROUGH */ |
|
case (MDOC_TAIL): |
|
/* FALLTHROUGH */ |
|
case (MDOC_BLOCK): |
|
/* FALLTHROUGH */ |
|
case (MDOC_ELEM): |
|
buf->len = 0; |
|
|
|
/* |
|
* Both NULL handlers and handlers returning true |
|
* request using the data. Only skip the element |
|
* when the handler returns false. |
|
*/ |
|
|
|
if (NULL != mdocs[n->tok].fp && |
|
0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) |
|
break; |
|
|
|
/* |
|
* For many macros, use the text from all children. |
|
* Set zero flags for macros not needing this. |
|
* In that case, the handler must fill the buffer. |
|
*/ |
|
|
|
if (MDOCF_CHILD & mdocs[n->tok].flags) |
|
buf_appendmdoc(buf, n->child, 0); |
|
|
|
/* |
|
* Cover the most common case: |
|
* Automatically stage one string per element. |
|
* Set a zero mask for macros not needing this. |
|
* Additional staging can be done in the handler. |
|
*/ |
|
|
|
if (mdocs[n->tok].mask) |
|
hash_put(hash, buf, mdocs[n->tok].mask); |
|
break; |
|
default: |
|
break; |
|
} |
} |
|
|
pmdoc_node(hash, buf, dbuf, n->child, m); |
|
pmdoc_node(hash, buf, dbuf, n->next, m); |
|
} |
} |
|
|
static int |
static void |
pman_node(MAN_ARGS) |
parse_man(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
{ |
{ |
const struct man_node *head, *body; |
const struct roff_node *head, *body; |
char *start, *sv, *title; |
char *start, *title; |
size_t sz, titlesz; |
char byte; |
|
size_t sz; |
|
|
if (NULL == n) |
if (n == NULL) |
return(0); |
return; |
|
|
/* |
/* |
* We're only searching for one thing: the first text child in |
* We're only searching for one thing: the first text child in |
Line 1395 pman_node(MAN_ARGS) |
|
Line 1492 pman_node(MAN_ARGS) |
|
* the correct section or not. |
* the correct section or not. |
*/ |
*/ |
|
|
if (MAN_BODY == n->type && MAN_SH == n->tok) { |
if (n->type == ROFFT_BODY && n->tok == MAN_SH) { |
body = n; |
body = n; |
assert(body->parent); |
if ((head = body->parent->head) != NULL && |
if (NULL != (head = body->parent->head) && |
(head = head->child) != NULL && |
1 == head->nchild && |
head->next == NULL && |
NULL != (head = (head->child)) && |
head->type == ROFFT_TEXT && |
MAN_TEXT == head->type && |
strcmp(head->string, "NAME") == 0 && |
0 == strcmp(head->string, "NAME") && |
body->child != NULL) { |
NULL != (body = body->child) && |
|
MAN_TEXT == body->type) { |
|
|
|
title = NULL; |
|
titlesz = 0; |
|
/* |
/* |
* Suck the entire NAME section into memory. |
* Suck the entire NAME section into memory. |
* Yes, we might run away. |
* Yes, we might run away. |
* But too many manuals have big, spread-out |
* But too many manuals have big, spread-out |
* NAME sections over many lines. |
* NAME sections over many lines. |
*/ |
*/ |
for ( ; NULL != body; body = body->next) { |
|
if (MAN_TEXT != body->type) |
title = NULL; |
break; |
deroff(&title, body); |
if (0 == (sz = strlen(body->string))) |
|
continue; |
|
title = mandoc_realloc |
|
(title, titlesz + sz + 1); |
|
memcpy(title + titlesz, body->string, sz); |
|
titlesz += sz + 1; |
|
title[(int)titlesz - 1] = ' '; |
|
} |
|
if (NULL == title) |
if (NULL == title) |
return(0); |
return; |
|
|
title = mandoc_realloc(title, titlesz + 1); |
/* |
title[(int)titlesz] = '\0'; |
|
|
|
/* Skip leading space. */ |
|
|
|
sv = title; |
|
while (isspace((unsigned char)*sv)) |
|
sv++; |
|
|
|
if (0 == (sz = strlen(sv))) { |
|
free(title); |
|
return(0); |
|
} |
|
|
|
/* Erase trailing space. */ |
|
|
|
start = &sv[sz - 1]; |
|
while (start > sv && isspace((unsigned char)*start)) |
|
*start-- = '\0'; |
|
|
|
if (start == sv) { |
|
free(title); |
|
return(0); |
|
} |
|
|
|
start = sv; |
|
|
|
/* |
|
* Go through a special heuristic dance here. |
* Go through a special heuristic dance here. |
* This is why -man manuals are great! |
|
* (I'm being sarcastic: my eyes are bleeding.) |
|
* Conventionally, one or more manual names are |
* Conventionally, one or more manual names are |
* comma-specified prior to a whitespace, then a |
* comma-specified prior to a whitespace, then a |
* dash, then a description. Try to puzzle out |
* dash, then a description. Try to puzzle out |
* the name parts here. |
* the name parts here. |
*/ |
*/ |
|
|
|
start = title; |
for ( ;; ) { |
for ( ;; ) { |
sz = strcspn(start, " ,"); |
sz = strcspn(start, " ,"); |
if ('\0' == start[(int)sz]) |
if ('\0' == start[sz]) |
break; |
break; |
|
|
buf->len = 0; |
byte = start[sz]; |
buf_appendb(buf, start, sz); |
start[sz] = '\0'; |
buf_appendb(buf, "", 1); |
|
|
|
hash_put(hash, buf, TYPE_Nm); |
/* |
|
* Assume a stray trailing comma in the |
|
* name list if a name begins with a dash. |
|
*/ |
|
|
if (' ' == start[(int)sz]) { |
if ('-' == start[0] || |
start += (int)sz + 1; |
('\\' == start[0] && '-' == start[1])) |
break; |
break; |
|
|
|
putkey(mpage, start, NAME_TITLE); |
|
if ( ! (mpage->name_head_done || |
|
strcasecmp(start, meta->title))) { |
|
putkey(mpage, start, NAME_HEAD); |
|
mpage->name_head_done = 1; |
} |
} |
|
|
assert(',' == start[(int)sz]); |
if (' ' == byte) { |
start += (int)sz + 1; |
start += sz + 1; |
|
break; |
|
} |
|
|
|
assert(',' == byte); |
|
start += sz + 1; |
while (' ' == *start) |
while (' ' == *start) |
start++; |
start++; |
} |
} |
|
|
buf->len = 0; |
if (start == title) { |
|
putkey(mpage, start, NAME_TITLE); |
if (sv == start) { |
if ( ! (mpage->name_head_done || |
buf_append(buf, start); |
strcasecmp(start, meta->title))) { |
|
putkey(mpage, start, NAME_HEAD); |
|
mpage->name_head_done = 1; |
|
} |
free(title); |
free(title); |
return(1); |
return; |
} |
} |
|
|
while (isspace((unsigned char)*start)) |
while (isspace((unsigned char)*start)) |
Line 1512 pman_node(MAN_ARGS) |
|
Line 1585 pman_node(MAN_ARGS) |
|
while (' ' == *start) |
while (' ' == *start) |
start++; |
start++; |
|
|
sz = strlen(start) + 1; |
/* |
buf_appendb(dbuf, start, sz); |
* Cut off excessive one-line descriptions. |
buf_appendb(buf, start, sz); |
* Bad pages are not worth better heuristics. |
|
*/ |
|
|
hash_put(hash, buf, TYPE_Nd); |
mpage->desc = mandoc_strndup(start, 150); |
free(title); |
free(title); |
|
return; |
} |
} |
} |
} |
|
|
for (n = n->child; n; n = n->next) |
for (n = n->child; n; n = n->next) { |
if (pman_node(hash, buf, dbuf, n)) |
if (NULL != mpage->desc) |
return(1); |
break; |
|
parse_man(mpage, meta, n); |
return(0); |
} |
} |
} |
|
|
/* |
|
* Parse a formatted manual page. |
|
* By necessity, this involves rather crude guesswork. |
|
*/ |
|
static void |
static void |
pformatted(DB *hash, struct buf *buf, |
parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, |
struct buf *dbuf, const struct of *of) |
const struct roff_node *n) |
{ |
{ |
FILE *stream; |
const struct mdoc_handler *handler; |
char *line, *p, *title; |
|
size_t len, plen, titlesz; |
|
|
|
if (NULL == (stream = fopen(of->fname, "r"))) { |
for (n = n->child; n != NULL; n = n->next) { |
if (warnings) |
if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) |
perror(of->fname); |
continue; |
return; |
assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); |
|
handler = mdoc_handlers + (n->tok - MDOC_Dd); |
|
if (n->flags & handler->taboo) |
|
continue; |
|
|
|
switch (n->type) { |
|
case ROFFT_ELEM: |
|
case ROFFT_BLOCK: |
|
case ROFFT_HEAD: |
|
case ROFFT_BODY: |
|
case ROFFT_TAIL: |
|
if (handler->fp != NULL && |
|
(*handler->fp)(mpage, meta, n) == 0) |
|
break; |
|
if (handler->mask) |
|
putmdockey(mpage, n->child, |
|
handler->mask, handler->taboo); |
|
break; |
|
default: |
|
continue; |
|
} |
|
if (NULL != n->child) |
|
parse_mdoc(mpage, meta, n); |
} |
} |
|
} |
|
|
/* |
static int |
* Always use the title derived from the filename up front, |
parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta, |
* do not even try to find it in the file. This also makes |
const struct roff_node *n) |
* sure we don't end up with an orphan index record, even if |
{ |
* the file content turns out to be completely unintelligible. |
uint64_t mask; |
*/ |
|
|
|
buf->len = 0; |
mask = TYPE_Fa; |
buf_append(buf, of->title); |
if (n->sec == SEC_SYNOPSIS) |
hash_put(hash, buf, TYPE_Nm); |
mask |= TYPE_Vt; |
|
|
/* Skip to first blank line. */ |
putmdockey(mpage, n->child, mask, 0); |
|
return 0; |
|
} |
|
|
while (NULL != (line = fgetln(stream, &len))) |
static int |
if ('\n' == *line) |
parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, |
break; |
const struct roff_node *n) |
|
{ |
|
char *start, *end; |
|
size_t sz; |
|
|
|
if (SEC_SYNOPSIS != n->sec || |
|
NULL == (n = n->child) || |
|
n->type != ROFFT_TEXT) |
|
return 0; |
|
|
/* |
/* |
* Assume the first line that is not indented |
* Only consider those `Fd' macro fields that begin with an |
* is the first section header. Skip to it. |
* "inclusion" token (versus, e.g., #define). |
*/ |
*/ |
|
|
while (NULL != (line = fgetln(stream, &len))) |
if (strcmp("#include", n->string)) |
if ('\n' != *line && ' ' != *line) |
return 0; |
break; |
|
|
if ((n = n->next) == NULL || n->type != ROFFT_TEXT) |
|
return 0; |
|
|
/* |
/* |
* Read up until the next section into a buffer. |
* Strip away the enclosing angle brackets and make sure we're |
* Strip the leading and trailing newline from each read line, |
* not zero-length. |
* appending a trailing space. |
|
* Ignore empty (whitespace-only) lines. |
|
*/ |
*/ |
|
|
titlesz = 0; |
start = n->string; |
title = NULL; |
if ('<' == *start || '"' == *start) |
|
start++; |
|
|
while (NULL != (line = fgetln(stream, &len))) { |
if (0 == (sz = strlen(start))) |
if (' ' != *line || '\n' != line[(int)len - 1]) |
return 0; |
break; |
|
while (len > 0 && isspace((unsigned char)*line)) { |
|
line++; |
|
len--; |
|
} |
|
if (1 == len) |
|
continue; |
|
title = mandoc_realloc(title, titlesz + len); |
|
memcpy(title + titlesz, line, len); |
|
titlesz += len; |
|
title[(int)titlesz - 1] = ' '; |
|
} |
|
|
|
|
end = &start[(int)sz - 1]; |
|
if ('>' == *end || '"' == *end) |
|
end--; |
|
|
/* |
if (end > start) |
* If no page content can be found, or the input line |
putkeys(mpage, start, end - start + 1, TYPE_In); |
* is already the next section header, or there is no |
return 0; |
* trailing newline, reuse the page title as the page |
} |
* description. |
|
*/ |
|
|
|
if (NULL == title || '\0' == *title) { |
static void |
if (warnings) |
parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) |
fprintf(stderr, "%s: cannot find NAME section\n", |
{ |
of->fname); |
char *cp; |
buf_appendb(dbuf, buf->cp, buf->size); |
size_t sz; |
hash_put(hash, buf, TYPE_Nd); |
|
fclose(stream); |
if (n->type != ROFFT_TEXT) |
free(title); |
|
return; |
return; |
|
|
|
/* Skip function pointer punctuation. */ |
|
|
|
cp = n->string; |
|
while (*cp == '(' || *cp == '*') |
|
cp++; |
|
sz = strcspn(cp, "()"); |
|
|
|
putkeys(mpage, cp, sz, TYPE_Fn); |
|
if (n->sec == SEC_SYNOPSIS) |
|
putkeys(mpage, cp, sz, NAME_SYN); |
|
} |
|
|
|
static int |
|
parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
uint64_t mask; |
|
|
|
if (n->child == NULL) |
|
return 0; |
|
|
|
parse_mdoc_fname(mpage, n->child); |
|
|
|
n = n->child->next; |
|
if (n != NULL && n->type == ROFFT_TEXT) { |
|
mask = TYPE_Fa; |
|
if (n->sec == SEC_SYNOPSIS) |
|
mask |= TYPE_Vt; |
|
putmdockey(mpage, n, mask, 0); |
} |
} |
|
|
title = mandoc_realloc(title, titlesz + 1); |
return 0; |
title[(int)titlesz] = '\0'; |
} |
|
|
/* |
static int |
* Skip to the first dash. |
parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, |
* Use the remaining line as the description (no more than 70 |
const struct roff_node *n) |
* bytes). |
{ |
*/ |
|
|
|
if (NULL != (p = strstr(title, "- "))) { |
if (n->type != ROFFT_HEAD) |
for (p += 2; ' ' == *p || '\b' == *p; p++) |
return 1; |
/* Skip to next word. */ ; |
|
} else { |
if (n->child != NULL) |
if (warnings) |
parse_mdoc_fname(mpage, n->child); |
fprintf(stderr, "%s: no dash in title line\n", |
|
of->fname); |
return 0; |
p = title; |
} |
|
|
|
static int |
|
parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
char *cp; |
|
|
|
if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) |
|
return 0; |
|
|
|
if (n->child != NULL && |
|
n->child->next == NULL && |
|
n->child->type == ROFFT_TEXT) |
|
return 1; |
|
|
|
cp = NULL; |
|
deroff(&cp, n); |
|
if (cp != NULL) { |
|
putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || |
|
n->type == ROFFT_BODY ? TYPE_Va : 0)); |
|
free(cp); |
} |
} |
|
|
plen = strlen(p); |
return 0; |
|
} |
|
|
/* Strip backspace-encoding from line. */ |
static int |
|
parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
char *cp; |
|
|
while (NULL != (line = memchr(p, '\b', plen))) { |
if (NULL == (n = n->child)) |
len = line - p; |
return 0; |
if (0 == len) { |
|
memmove(line, line + 1, plen--); |
if (NULL == n->next) { |
continue; |
putkey(mpage, n->string, TYPE_Xr); |
} |
return 0; |
memmove(line - 1, line + 1, plen - len); |
|
plen -= 2; |
|
} |
} |
|
|
buf_appendb(dbuf, p, plen + 1); |
mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); |
buf->len = 0; |
putkey(mpage, cp, TYPE_Xr); |
buf_appendb(buf, p, plen + 1); |
free(cp); |
hash_put(hash, buf, TYPE_Nd); |
return 0; |
fclose(stream); |
|
free(title); |
|
} |
} |
|
|
|
static int |
|
parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
|
|
if (n->type == ROFFT_BODY) |
|
deroff(&mpage->desc, n); |
|
return 0; |
|
} |
|
|
|
static int |
|
parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
|
|
if (SEC_NAME == n->sec) |
|
putmdockey(mpage, n->child, NAME_TITLE, 0); |
|
else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { |
|
if (n->child == NULL) |
|
putkey(mpage, meta->name, NAME_SYN); |
|
else |
|
putmdockey(mpage, n->child, NAME_SYN, 0); |
|
} |
|
if ( ! (mpage->name_head_done || |
|
n->child == NULL || n->child->string == NULL || |
|
strcasecmp(n->child->string, meta->title))) { |
|
putkey(mpage, n->child->string, NAME_HEAD); |
|
mpage->name_head_done = 1; |
|
} |
|
return 0; |
|
} |
|
|
|
static int |
|
parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
|
|
return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; |
|
} |
|
|
|
static int |
|
parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, |
|
const struct roff_node *n) |
|
{ |
|
|
|
return n->type == ROFFT_HEAD; |
|
} |
|
|
|
/* |
|
* Add a string to the hash table for the current manual. |
|
* Each string has a bitmask telling which macros it belongs to. |
|
* When we finish the manual, we'll dump the table. |
|
*/ |
static void |
static void |
ofile_argbuild(int argc, char *argv[], struct of **of, |
putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) |
const char *basedir) |
|
{ |
{ |
char buf[PATH_MAX]; |
struct ohash *htab; |
char pbuf[PATH_MAX]; |
struct str *s; |
const char *sec, *arch, *title; |
const char *end; |
char *relpath, *p; |
unsigned int slot; |
int i, src_form; |
int i, mustfree; |
struct of *nof; |
|
|
|
for (i = 0; i < argc; i++) { |
if (0 == sz) |
if (NULL == (relpath = realpath(argv[i], pbuf))) { |
return; |
perror(argv[i]); |
|
|
mustfree = render_string(&cp, &sz); |
|
|
|
if (TYPE_Nm & v) { |
|
htab = &names; |
|
v &= name_mask; |
|
if (v & NAME_FIRST) |
|
name_mask &= ~NAME_FIRST; |
|
if (debug > 1) |
|
say(mpage->mlinks->file, |
|
"Adding name %*s, bits=0x%llx", (int)sz, cp, |
|
(unsigned long long)v); |
|
} else { |
|
htab = &strings; |
|
if (debug > 1) |
|
for (i = 0; i < KEY_MAX; i++) |
|
if ((uint64_t)1 << i & v) |
|
say(mpage->mlinks->file, |
|
"Adding key %s=%*s", |
|
mansearch_keynames[i], (int)sz, cp); |
|
} |
|
|
|
end = cp + sz; |
|
slot = ohash_qlookupi(htab, cp, &end); |
|
s = ohash_find(htab, slot); |
|
|
|
if (NULL != s && mpage == s->mpage) { |
|
s->mask |= v; |
|
return; |
|
} else if (NULL == s) { |
|
s = mandoc_calloc(1, sizeof(struct str) + sz + 1); |
|
memcpy(s->key, cp, sz); |
|
ohash_insert(htab, slot, s); |
|
} |
|
s->mpage = mpage; |
|
s->mask = v; |
|
|
|
if (mustfree) |
|
free(cp); |
|
} |
|
|
|
/* |
|
* Take a Unicode codepoint and produce its UTF-8 encoding. |
|
* This isn't the best way to do this, but it works. |
|
* The magic numbers are from the UTF-8 packaging. |
|
* They're not as scary as they seem: read the UTF-8 spec for details. |
|
*/ |
|
static size_t |
|
utf8(unsigned int cp, char out[7]) |
|
{ |
|
size_t rc; |
|
|
|
rc = 0; |
|
if (cp <= 0x0000007F) { |
|
rc = 1; |
|
out[0] = (char)cp; |
|
} else if (cp <= 0x000007FF) { |
|
rc = 2; |
|
out[0] = (cp >> 6 & 31) | 192; |
|
out[1] = (cp & 63) | 128; |
|
} else if (cp <= 0x0000FFFF) { |
|
rc = 3; |
|
out[0] = (cp >> 12 & 15) | 224; |
|
out[1] = (cp >> 6 & 63) | 128; |
|
out[2] = (cp & 63) | 128; |
|
} else if (cp <= 0x001FFFFF) { |
|
rc = 4; |
|
out[0] = (cp >> 18 & 7) | 240; |
|
out[1] = (cp >> 12 & 63) | 128; |
|
out[2] = (cp >> 6 & 63) | 128; |
|
out[3] = (cp & 63) | 128; |
|
} else if (cp <= 0x03FFFFFF) { |
|
rc = 5; |
|
out[0] = (cp >> 24 & 3) | 248; |
|
out[1] = (cp >> 18 & 63) | 128; |
|
out[2] = (cp >> 12 & 63) | 128; |
|
out[3] = (cp >> 6 & 63) | 128; |
|
out[4] = (cp & 63) | 128; |
|
} else if (cp <= 0x7FFFFFFF) { |
|
rc = 6; |
|
out[0] = (cp >> 30 & 1) | 252; |
|
out[1] = (cp >> 24 & 63) | 128; |
|
out[2] = (cp >> 18 & 63) | 128; |
|
out[3] = (cp >> 12 & 63) | 128; |
|
out[4] = (cp >> 6 & 63) | 128; |
|
out[5] = (cp & 63) | 128; |
|
} else |
|
return 0; |
|
|
|
out[rc] = '\0'; |
|
return rc; |
|
} |
|
|
|
/* |
|
* If the string contains escape sequences, |
|
* replace it with an allocated rendering and return 1, |
|
* such that the caller can free it after use. |
|
* Otherwise, do nothing and return 0. |
|
*/ |
|
static int |
|
render_string(char **public, size_t *psz) |
|
{ |
|
const char *src, *scp, *addcp, *seq; |
|
char *dst; |
|
size_t ssz, dsz, addsz; |
|
char utfbuf[7], res[6]; |
|
int seqlen, unicode; |
|
|
|
res[0] = '\\'; |
|
res[1] = '\t'; |
|
res[2] = ASCII_NBRSP; |
|
res[3] = ASCII_HYPH; |
|
res[4] = ASCII_BREAK; |
|
res[5] = '\0'; |
|
|
|
src = scp = *public; |
|
ssz = *psz; |
|
dst = NULL; |
|
dsz = 0; |
|
|
|
while (scp < src + *psz) { |
|
|
|
/* Leave normal characters unchanged. */ |
|
|
|
if (strchr(res, *scp) == NULL) { |
|
if (dst != NULL) |
|
dst[dsz++] = *scp; |
|
scp++; |
continue; |
continue; |
} |
} |
if (NULL != basedir) { |
|
if (strstr(pbuf, basedir) != pbuf) { |
|
fprintf(stderr, "%s: file outside " |
|
"base directory %s\n", |
|
pbuf, basedir); |
|
continue; |
|
} |
|
relpath = pbuf + strlen(basedir); |
|
} |
|
|
|
/* |
/* |
* Try to infer the manual section, architecture and |
* Found something that requires replacing, |
* page title from the path, assuming it looks like |
* make sure we have a destination buffer. |
* man*[/<arch>]/<title>.<section> or |
|
* cat<section>[/<arch>]/<title>.0 |
|
*/ |
*/ |
|
|
if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) { |
if (dst == NULL) { |
fprintf(stderr, "%s: path too long\n", relpath); |
dst = mandoc_malloc(ssz + 1); |
continue; |
dsz = scp - src; |
|
memcpy(dst, src, dsz); |
} |
} |
sec = arch = title = ""; |
|
src_form = 0; |
/* Handle single-char special characters. */ |
p = strrchr(buf, '\0'); |
|
while (p-- > buf) { |
switch (*scp) { |
if ('\0' == *sec && '.' == *p) { |
case '\\': |
sec = p + 1; |
|
*p = '\0'; |
|
if ('0' == *sec) |
|
src_form |= MANDOC_FORM; |
|
else if ('1' <= *sec && '9' >= *sec) |
|
src_form |= MANDOC_SRC; |
|
continue; |
|
} |
|
if ('/' != *p) |
|
continue; |
|
if ('\0' == *title) { |
|
title = p + 1; |
|
*p = '\0'; |
|
continue; |
|
} |
|
if (0 == strncmp("man", p + 1, 3)) |
|
src_form |= MANDOC_SRC; |
|
else if (0 == strncmp("cat", p + 1, 3)) |
|
src_form |= MANDOC_FORM; |
|
else |
|
arch = p + 1; |
|
break; |
break; |
|
case '\t': |
|
case ASCII_NBRSP: |
|
dst[dsz++] = ' '; |
|
scp++; |
|
continue; |
|
case ASCII_HYPH: |
|
dst[dsz++] = '-'; |
|
/* FALLTHROUGH */ |
|
case ASCII_BREAK: |
|
scp++; |
|
continue; |
|
default: |
|
abort(); |
} |
} |
if ('\0' == *title) { |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s: cannot deduce title " |
|
"from filename\n", |
|
relpath); |
|
title = buf; |
|
} |
|
|
|
/* |
/* |
* Build the file structure. |
* Found an escape sequence. |
|
* Read past the slash, then parse it. |
|
* Ignore everything except characters. |
*/ |
*/ |
|
|
nof = mandoc_calloc(1, sizeof(struct of)); |
scp++; |
nof->fname = mandoc_strdup(relpath); |
if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) |
nof->sec = mandoc_strdup(sec); |
continue; |
nof->arch = mandoc_strdup(arch); |
|
nof->title = mandoc_strdup(title); |
|
nof->src_form = src_form; |
|
|
|
/* |
/* |
* Add the structure to the list. |
* Render the special character |
|
* as either UTF-8 or ASCII. |
*/ |
*/ |
|
|
if (NULL == *of) { |
if (write_utf8) { |
*of = nof; |
unicode = mchars_spec2cp(seq, seqlen); |
(*of)->first = nof; |
if (unicode <= 0) |
|
continue; |
|
addsz = utf8(unicode, utfbuf); |
|
if (addsz == 0) |
|
continue; |
|
addcp = utfbuf; |
} else { |
} else { |
nof->first = (*of)->first; |
addcp = mchars_spec2str(seq, seqlen, &addsz); |
(*of)->next = nof; |
if (addcp == NULL) |
*of = nof; |
continue; |
|
if (*addcp == ASCII_NBRSP) { |
|
addcp = " "; |
|
addsz = 1; |
|
} |
} |
} |
|
|
|
/* Copy the rendered glyph into the stream. */ |
|
|
|
ssz += addsz; |
|
dst = mandoc_realloc(dst, ssz + 1); |
|
memcpy(dst + dsz, addcp, addsz); |
|
dsz += addsz; |
} |
} |
|
if (dst != NULL) { |
|
*public = dst; |
|
*psz = dsz; |
|
} |
|
|
|
/* Trim trailing whitespace and NUL-terminate. */ |
|
|
|
while (*psz > 0 && (*public)[*psz - 1] == ' ') |
|
--*psz; |
|
if (dst != NULL) { |
|
(*public)[*psz] = '\0'; |
|
return 1; |
|
} else |
|
return 0; |
} |
} |
|
|
|
static void |
|
dbadd_mlink(const struct mlink *mlink) |
|
{ |
|
dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE); |
|
dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec); |
|
dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec); |
|
dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch); |
|
dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file); |
|
} |
|
|
/* |
/* |
* Recursively build up a list of files to parse. |
* Flush the current page's terms (and their bits) into the database. |
* We use this instead of ftw() and so on because I don't want global |
* Also, handle escape sequences at the last possible moment. |
* variables hanging around. |
|
* This ignores the mandoc.db and mandoc.index files, but assumes that |
|
* everything else is a manual. |
|
* Pass in a pointer to a NULL structure for the first invocation. |
|
*/ |
*/ |
static void |
static void |
ofile_dirbuild(const char *dir, const char* psec, const char *parch, |
dbadd(struct dba *dba, struct mpage *mpage) |
int p_src_form, struct of **of) |
|
{ |
{ |
char buf[PATH_MAX]; |
struct mlink *mlink; |
#if defined(__sun) |
struct str *key; |
struct stat sb; |
char *cp; |
#endif |
uint64_t mask; |
size_t sz; |
size_t i; |
DIR *d; |
unsigned int slot; |
const char *fn, *sec, *arch; |
int mustfree; |
char *p, *q, *suffix; |
|
struct of *nof; |
|
struct dirent *dp; |
|
int src_form; |
|
|
|
if (NULL == (d = opendir(dir))) { |
mlink = mpage->mlinks; |
if (warnings) |
|
perror(dir); |
if (nodb) { |
|
for (key = ohash_first(&names, &slot); NULL != key; |
|
key = ohash_next(&names, &slot)) |
|
free(key); |
|
for (key = ohash_first(&strings, &slot); NULL != key; |
|
key = ohash_next(&strings, &slot)) |
|
free(key); |
|
if (0 == debug) |
|
return; |
|
while (NULL != mlink) { |
|
fputs(mlink->name, stdout); |
|
if (NULL == mlink->next || |
|
strcmp(mlink->dsec, mlink->next->dsec) || |
|
strcmp(mlink->fsec, mlink->next->fsec) || |
|
strcmp(mlink->arch, mlink->next->arch)) { |
|
putchar('('); |
|
if ('\0' == *mlink->dsec) |
|
fputs(mlink->fsec, stdout); |
|
else |
|
fputs(mlink->dsec, stdout); |
|
if ('\0' != *mlink->arch) |
|
printf("/%s", mlink->arch); |
|
putchar(')'); |
|
} |
|
mlink = mlink->next; |
|
if (NULL != mlink) |
|
fputs(", ", stdout); |
|
} |
|
printf(" - %s\n", mpage->desc); |
return; |
return; |
} |
} |
|
|
while (NULL != (dp = readdir(d))) { |
if (debug) |
fn = dp->d_name; |
say(mlink->file, "Adding to database"); |
|
|
if ('.' == *fn) |
cp = mpage->desc; |
continue; |
i = strlen(cp); |
|
mustfree = render_string(&cp, &i); |
|
mpage->dba = dba_page_new(dba->pages, |
|
*mpage->arch == '\0' ? mlink->arch : mpage->arch, |
|
cp, mlink->file, mpage->form); |
|
if (mustfree) |
|
free(cp); |
|
dba_page_add(mpage->dba, DBP_SECT, mpage->sec); |
|
|
src_form = p_src_form; |
while (mlink != NULL) { |
|
dbadd_mlink(mlink); |
|
mlink = mlink->next; |
|
} |
|
|
#if defined(__sun) |
for (key = ohash_first(&names, &slot); NULL != key; |
stat(dp->d_name, &sb); |
key = ohash_next(&names, &slot)) { |
if (S_IFDIR & sb.st_mode) { |
assert(key->mpage == mpage); |
#else |
dba_page_alias(mpage->dba, key->key, key->mask); |
if (DT_DIR == dp->d_type) { |
free(key); |
#endif |
} |
sec = psec; |
for (key = ohash_first(&strings, &slot); NULL != key; |
arch = parch; |
key = ohash_next(&strings, &slot)) { |
|
assert(key->mpage == mpage); |
|
i = 0; |
|
for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) { |
|
if (key->mask & mask) |
|
dba_macro_add(dba->macros, i, |
|
key->key, mpage->dba); |
|
i++; |
|
} |
|
free(key); |
|
} |
|
} |
|
|
/* |
static void |
* By default, only use directories called: |
dbprune(struct dba *dba) |
* man<section>/[<arch>/] or |
{ |
* cat<section>/[<arch>/] |
struct dba_array *page, *files; |
*/ |
char *file; |
|
|
if ('\0' == *sec) { |
dba_array_FOREACH(dba->pages, page) { |
if(0 == strncmp("man", fn, 3)) { |
files = dba_array_get(page, DBP_FILE); |
src_form |= MANDOC_SRC; |
dba_array_FOREACH(files, file) { |
sec = fn + 3; |
if (*file < ' ') |
} else if (0 == strncmp("cat", fn, 3)) { |
file++; |
src_form |= MANDOC_FORM; |
if (ohash_find(&mlinks, ohash_qlookup(&mlinks, |
sec = fn + 3; |
file)) != NULL) { |
} else { |
if (debug) |
if (warnings) fprintf(stderr, |
say(file, "Deleting from database"); |
"%s/%s: bad section\n", |
dba_array_del(dba->pages); |
dir, fn); |
break; |
if (use_all) |
|
sec = fn; |
|
else |
|
continue; |
|
} |
|
} else if ('\0' == *arch) { |
|
if (NULL != strchr(fn, '.')) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: bad architecture\n", |
|
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
|
arch = fn; |
|
} else { |
|
if (warnings) fprintf(stderr, "%s/%s: " |
|
"excessive subdirectory\n", dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
} |
|
} |
|
} |
|
} |
|
|
buf[0] = '\0'; |
/* |
strlcat(buf, dir, PATH_MAX); |
* Write the database from memory to disk. |
strlcat(buf, "/", PATH_MAX); |
*/ |
sz = strlcat(buf, fn, PATH_MAX); |
static void |
|
dbwrite(struct dba *dba) |
|
{ |
|
struct stat sb1, sb2; |
|
char tfn[33], *cp1, *cp2; |
|
off_t i; |
|
int fd1, fd2; |
|
|
if (PATH_MAX <= sz) { |
/* |
if (warnings) fprintf(stderr, "%s/%s: " |
* Do not write empty databases, and delete existing ones |
"path too long\n", dir, fn); |
* when makewhatis -u causes them to become empty. |
continue; |
*/ |
} |
|
|
|
ofile_dirbuild(buf, sec, arch, src_form, of); |
dba_array_start(dba->pages); |
continue; |
if (dba_array_next(dba->pages) == NULL) { |
} |
if (unlink(MANDOC_DB) == -1 && errno != ENOENT) |
|
say(MANDOC_DB, "&unlink"); |
|
return; |
|
} |
|
|
#if defined(__sun) |
/* |
if (0 == S_IFREG & sb.st_mode) { |
* Build the database in a temporary file, |
#else |
* then atomically move it into place. |
if (DT_REG != dp->d_type) { |
*/ |
#endif |
|
if (warnings) |
if (dba_write(MANDOC_DB "~", dba) != -1) { |
fprintf(stderr, |
if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { |
"%s/%s: not a regular file\n", |
exitcode = (int)MANDOCLEVEL_SYSERR; |
dir, fn); |
say(MANDOC_DB, "&rename"); |
continue; |
unlink(MANDOC_DB "~"); |
} |
} |
if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) |
return; |
continue; |
} |
if ('\0' == *psec) { |
|
if (warnings) |
|
fprintf(stderr, |
|
"%s/%s: file outside section\n", |
|
dir, fn); |
|
if (0 == use_all) |
|
continue; |
|
} |
|
|
|
/* |
/* |
* By default, skip files where the file name suffix |
* We lack write permission and cannot replace the database |
* does not agree with the section directory |
* file, but let's at least check whether the data changed. |
* they are located in. |
*/ |
*/ |
|
|
|
suffix = strrchr(fn, '.'); |
(void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); |
if (NULL == suffix) { |
if (mkdtemp(tfn) == NULL) { |
if (warnings) |
exitcode = (int)MANDOCLEVEL_SYSERR; |
fprintf(stderr, |
say("", "&%s", tfn); |
"%s/%s: no filename suffix\n", |
return; |
dir, fn); |
} |
if (0 == use_all) |
cp1 = cp2 = MAP_FAILED; |
continue; |
fd1 = fd2 = -1; |
} else if ((MANDOC_SRC & src_form && |
(void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn)); |
strcmp(suffix + 1, psec)) || |
if (dba_write(tfn, dba) == -1) { |
(MANDOC_FORM & src_form && |
say(tfn, "&dba_write"); |
strcmp(suffix + 1, "0"))) { |
goto err; |
if (warnings) |
} |
fprintf(stderr, |
if ((fd1 = open(MANDOC_DB, O_RDONLY)) == -1) { |
"%s/%s: wrong filename suffix\n", |
say(MANDOC_DB, "&open"); |
dir, fn); |
goto err; |
if (0 == use_all) |
} |
continue; |
if ((fd2 = open(tfn, O_RDONLY)) == -1) { |
if ('0' == suffix[1]) |
say(tfn, "&open"); |
src_form |= MANDOC_FORM; |
goto err; |
else if ('1' <= suffix[1] && '9' >= suffix[1]) |
} |
src_form |= MANDOC_SRC; |
if (fstat(fd1, &sb1) == -1) { |
} |
say(MANDOC_DB, "&fstat"); |
|
goto err; |
|
} |
|
if (fstat(fd2, &sb2) == -1) { |
|
say(tfn, "&fstat"); |
|
goto err; |
|
} |
|
if (sb1.st_size != sb2.st_size) |
|
goto err; |
|
if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE, |
|
fd1, 0)) == MAP_FAILED) { |
|
say(MANDOC_DB, "&mmap"); |
|
goto err; |
|
} |
|
if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE, |
|
fd2, 0)) == MAP_FAILED) { |
|
say(tfn, "&mmap"); |
|
goto err; |
|
} |
|
for (i = 0; i < sb1.st_size; i++) |
|
if (cp1[i] != cp2[i]) |
|
goto err; |
|
goto out; |
|
|
/* |
err: |
* Skip formatted manuals if a source version is |
exitcode = (int)MANDOCLEVEL_SYSERR; |
* available. Ignore the age: it is very unlikely |
say(MANDOC_DB, "Data changed, but cannot replace database"); |
* that people install newer formatted base manuals |
|
* when they used to have source manuals before, |
|
* and in ports, old manuals get removed on update. |
|
*/ |
|
if (0 == use_all && MANDOC_FORM & src_form && |
|
'\0' != *psec) { |
|
buf[0] = '\0'; |
|
strlcat(buf, dir, PATH_MAX); |
|
p = strrchr(buf, '/'); |
|
if ('\0' != *parch && NULL != p) |
|
for (p--; p > buf; p--) |
|
if ('/' == *p) |
|
break; |
|
if (NULL == p) |
|
p = buf; |
|
else |
|
p++; |
|
if (0 == strncmp("cat", p, 3)) |
|
memcpy(p, "man", 3); |
|
strlcat(buf, "/", PATH_MAX); |
|
sz = strlcat(buf, fn, PATH_MAX); |
|
if (sz >= PATH_MAX) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: path too long\n", |
|
dir, fn); |
|
continue; |
|
} |
|
q = strrchr(buf, '.'); |
|
if (NULL != q && p < q++) { |
|
*q = '\0'; |
|
sz = strlcat(buf, psec, PATH_MAX); |
|
if (sz >= PATH_MAX) { |
|
if (warnings) fprintf(stderr, |
|
"%s/%s: path too long\n", |
|
dir, fn); |
|
continue; |
|
} |
|
if (0 == access(buf, R_OK)) |
|
continue; |
|
} |
|
} |
|
|
|
buf[0] = '\0'; |
out: |
assert('.' == dir[0]); |
if (cp1 != MAP_FAILED) |
if ('/' == dir[1]) { |
munmap(cp1, sb1.st_size); |
strlcat(buf, dir + 2, PATH_MAX); |
if (cp2 != MAP_FAILED) |
strlcat(buf, "/", PATH_MAX); |
munmap(cp2, sb2.st_size); |
} |
if (fd1 != -1) |
sz = strlcat(buf, fn, PATH_MAX); |
close(fd1); |
if (sz >= PATH_MAX) { |
if (fd2 != -1) |
if (warnings) fprintf(stderr, |
close(fd2); |
"%s/%s: path too long\n", dir, fn); |
unlink(tfn); |
continue; |
*strrchr(tfn, '/') = '\0'; |
} |
rmdir(tfn); |
|
} |
|
|
nof = mandoc_calloc(1, sizeof(struct of)); |
static int |
nof->fname = mandoc_strdup(buf); |
set_basedir(const char *targetdir, int report_baddir) |
nof->sec = mandoc_strdup(psec); |
{ |
nof->arch = mandoc_strdup(parch); |
static char startdir[PATH_MAX]; |
nof->src_form = src_form; |
static int getcwd_status; /* 1 = ok, 2 = failure */ |
|
static int chdir_status; /* 1 = changed directory */ |
|
|
/* |
/* |
* Remember the file name without the extension, |
* Remember the original working directory, if possible. |
* to be used as the page title in the database. |
* This will be needed if the second or a later directory |
*/ |
* on the command line is given as a relative path. |
|
* Do not error out if the current directory is not |
|
* searchable: Maybe it won't be needed after all. |
|
*/ |
|
if (getcwd_status == 0) { |
|
if (getcwd(startdir, sizeof(startdir)) == NULL) { |
|
getcwd_status = 2; |
|
(void)strlcpy(startdir, strerror(errno), |
|
sizeof(startdir)); |
|
} else |
|
getcwd_status = 1; |
|
} |
|
|
if (NULL != suffix) |
/* |
*suffix = '\0'; |
* We are leaving the old base directory. |
nof->title = mandoc_strdup(fn); |
* Do not use it any longer, not even for messages. |
|
*/ |
|
*basedir = '\0'; |
|
basedir_len = 0; |
|
|
/* |
/* |
* Add the structure to the list. |
* If and only if the directory was changed earlier and |
*/ |
* the next directory to process is given as a relative path, |
|
* first go back, or bail out if that is impossible. |
|
*/ |
|
if (chdir_status && *targetdir != '/') { |
|
if (getcwd_status == 2) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say("", "getcwd: %s", startdir); |
|
return 0; |
|
} |
|
if (chdir(startdir) == -1) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say("", "&chdir %s", startdir); |
|
return 0; |
|
} |
|
} |
|
|
if (NULL == *of) { |
/* |
*of = nof; |
* Always resolve basedir to the canonicalized absolute |
(*of)->first = nof; |
* pathname and append a trailing slash, such that |
} else { |
* we can reliably check whether files are inside. |
nof->first = (*of)->first; |
*/ |
(*of)->next = nof; |
if (realpath(targetdir, basedir) == NULL) { |
*of = nof; |
if (report_baddir || errno != ENOENT) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say("", "&%s: realpath", targetdir); |
} |
} |
|
*basedir = '\0'; |
|
return 0; |
|
} else if (chdir(basedir) == -1) { |
|
if (report_baddir || errno != ENOENT) { |
|
exitcode = (int)MANDOCLEVEL_BADARG; |
|
say("", "&chdir"); |
|
} |
|
*basedir = '\0'; |
|
return 0; |
} |
} |
|
chdir_status = 1; |
|
basedir_len = strlen(basedir); |
|
if (basedir[basedir_len - 1] != '/') { |
|
if (basedir_len >= PATH_MAX - 1) { |
|
exitcode = (int)MANDOCLEVEL_SYSERR; |
|
say("", "Filename too long"); |
|
*basedir = '\0'; |
|
basedir_len = 0; |
|
return 0; |
|
} |
|
basedir[basedir_len++] = '/'; |
|
basedir[basedir_len] = '\0'; |
|
} |
|
return 1; |
|
} |
|
|
closedir(d); |
#ifdef READ_ALLOWED_PATH |
|
static int |
|
read_allowed(const char *candidate) |
|
{ |
|
const char *cp; |
|
size_t len; |
|
|
|
for (cp = READ_ALLOWED_PATH;; cp += len) { |
|
while (*cp == ':') |
|
cp++; |
|
if (*cp == '\0') |
|
return 0; |
|
len = strcspn(cp, ":"); |
|
if (strncmp(candidate, cp, len) == 0) |
|
return 1; |
|
} |
} |
} |
|
#endif |
|
|
static void |
static void |
ofile_free(struct of *of) |
say(const char *file, const char *format, ...) |
{ |
{ |
struct of *nof; |
va_list ap; |
|
int use_errno; |
|
|
if (NULL != of) |
if (*basedir != '\0') |
of = of->first; |
fprintf(stderr, "%s", basedir); |
|
if (*basedir != '\0' && *file != '\0') |
|
fputc('/', stderr); |
|
if (*file != '\0') |
|
fprintf(stderr, "%s", file); |
|
|
while (NULL != of) { |
use_errno = 1; |
nof = of->next; |
if (format != NULL) { |
free(of->fname); |
switch (*format) { |
free(of->sec); |
case '&': |
free(of->arch); |
format++; |
free(of->title); |
break; |
free(of); |
case '\0': |
of = nof; |
format = NULL; |
|
break; |
|
default: |
|
use_errno = 0; |
|
break; |
|
} |
} |
} |
|
if (format != NULL) { |
|
if (*basedir != '\0' || *file != '\0') |
|
fputs(": ", stderr); |
|
va_start(ap, format); |
|
vfprintf(stderr, format, ap); |
|
va_end(ap); |
|
} |
|
if (use_errno) { |
|
if (*basedir != '\0' || *file != '\0' || format != NULL) |
|
fputs(": ", stderr); |
|
perror(NULL); |
|
} else |
|
fputc('\n', stderr); |
} |
} |