=================================================================== RCS file: /cvs/mandoc/Attic/makewhatis.c,v retrieving revision 1.1 retrieving revision 1.6 diff -u -p -r1.1 -r1.6 --- mandoc/Attic/makewhatis.c 2011/05/13 00:42:26 1.1 +++ mandoc/Attic/makewhatis.c 2011/06/21 14:16:05 1.6 @@ -1,4 +1,4 @@ -/* $Id: makewhatis.c,v 1.1 2011/05/13 00:42:26 kristaps Exp $ */ +/* $Id: makewhatis.c,v 1.6 2011/06/21 14:16:05 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -42,42 +42,42 @@ #define MANDOC_BUFSZ BUFSIZ #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR -enum type { - MANDOC_NONE = 0, - MANDOC_NAME, - MANDOC_FUNCTION, - MANDOC_UTILITY, - MANDOC_INCLUDES, - MANDOC_VARIABLE, - MANDOC_STANDARD, - MANDOC_AUTHOR, - MANDOC_CONFIG +/* Bit-fields. See makewhatis.1. */ + +#define TYPE_NAME 0x01 +#define TYPE_FUNCTION 0x02 +#define TYPE_UTILITY 0x04 +#define TYPE_INCLUDES 0x08 +#define TYPE_VARIABLE 0x10 +#define TYPE_STANDARD 0x20 +#define TYPE_AUTHOR 0x40 +#define TYPE_CONFIG 0x80 +#define TYPE_DESC 0x100 + +/* Buffer for storing growable data. */ + +struct buf { + char *cp; + size_t len; + size_t size; }; -#define MAN_ARGS DB *db, \ - const char *dbn, \ - DBT *key, size_t *ksz, \ - DBT *val, \ - DBT *rval, size_t *rsz, \ +#define MAN_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ const struct man_node *n -#define MDOC_ARGS DB *db, \ - const char *dbn, \ - DBT *key, size_t *ksz, \ - DBT *val, \ - DBT *rval, size_t *rsz, \ - const struct mdoc_node *n +#define MDOC_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct mdoc_node *n, \ + const struct mdoc_meta *m -static void dbt_append(DBT *, size_t *, const char *); -static void dbt_appendb(DBT *, size_t *, +static void buf_append(struct buf *, const char *); +static void buf_appendb(struct buf *, const void *, size_t); -static void dbt_init(DBT *, size_t *); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void usage(void); -static void pman(DB *, const char *, DBT *, size_t *, - DBT *, DBT *, size_t *, struct man *); +static void hash_put(DB *, const struct buf *, int); static int pman_node(MAN_ARGS); -static void pmdoc(DB *, const char *, DBT *, size_t *, - DBT *, DBT *, size_t *, struct mdoc *); static void pmdoc_node(MDOC_ARGS); static void pmdoc_An(MDOC_ARGS); static void pmdoc_Cd(MDOC_ARGS); @@ -89,11 +89,10 @@ static void pmdoc_Nd(MDOC_ARGS); static void pmdoc_Nm(MDOC_ARGS); static void pmdoc_St(MDOC_ARGS); static void pmdoc_Vt(MDOC_ARGS); +static void usage(void); typedef void (*pmdoc_nf)(MDOC_ARGS); -static const char *progname; - static const pmdoc_nf mdocs[MDOC_MAX] = { NULL, /* Ap */ NULL, /* Dd */ @@ -219,6 +218,8 @@ static const pmdoc_nf mdocs[MDOC_MAX] = { NULL, /* Ta */ }; +static const char *progname; + int main(int argc, char *argv[]) { @@ -233,17 +234,18 @@ main(int argc, char *argv[]) char ibuf[MAXPATHLEN], /* index fname */ ibbuf[MAXPATHLEN], /* index backup fname */ fbuf[MAXPATHLEN], /* btree fname */ - fbbuf[MAXPATHLEN]; /* btree backup fname */ - int ch; + fbbuf[MAXPATHLEN], /* btree backup fname */ + vbuf[8]; /* stringified record number */ + int ch, seq; DB *idx, /* index database */ - *db; /* keyword database */ - DBT rkey, rval, /* recno entries */ - key, val; /* persistent keyword entries */ - size_t sv, - ksz, rsz; /* entry buffer size */ - char vbuf[8]; /* stringified record number */ + *db, /* keyword database */ + *hash; /* temporary keyword hashtable */ + DBT key, val; + size_t sv, rsz; BTREEINFO info; /* btree configuration */ recno_t rec; /* current record number */ + struct buf buf, /* keyword buffer */ + dbuf; /* description buffer */ extern int optind; extern char *optarg; @@ -302,16 +304,25 @@ main(int argc, char *argv[]) /* * For the keyword database, open a BTREE database that allows - * duplicates. For the index database, use a standard RECNO - * database type. + * duplicates. + * For the index database, use a standard RECNO database type. + * For the temporary keyword hashtable, use the HASH database + * type. */ + hash = dbopen(NULL, MANDOC_FLAGS, 0644, DB_HASH, NULL); + if (NULL == hash) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + memset(&info, 0, sizeof(BTREEINFO)); info.flags = R_DUP; db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); if (NULL == db) { perror(fbbuf); + (*hash->close)(hash); exit((int)MANDOCLEVEL_SYSERR); } @@ -320,6 +331,7 @@ main(int argc, char *argv[]) if (NULL == db) { perror(ibbuf); (*db->close)(db); + (*hash->close)(hash); exit((int)MANDOCLEVEL_SYSERR); } @@ -332,18 +344,17 @@ main(int argc, char *argv[]) mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); - memset(&key, 0, sizeof(DBT)); - memset(&val, 0, sizeof(DBT)); - memset(&rkey, 0, sizeof(DBT)); - memset(&rval, 0, sizeof(DBT)); + rec = 1; + rsz = 0; - val.size = sizeof(vbuf); - val.data = vbuf; - rkey.size = sizeof(recno_t); + memset(&buf, 0, sizeof(struct buf)); + memset(&dbuf, 0, sizeof(struct buf)); - rec = 1; - ksz = rsz = 0; + buf.size = dbuf.size = MANDOC_BUFSZ; + buf.cp = mandoc_malloc(buf.size); + dbuf.cp = mandoc_malloc(dbuf.size); + while (NULL != (fn = *argv++)) { mparse_reset(mp); @@ -378,49 +389,76 @@ main(int argc, char *argv[]) * going to write a nil byte in its place. */ - dbt_init(&rval, &rsz); - dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1); - dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1); - dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1); - dbt_appendb(&rval, &rsz, arch ? arch : "", + dbuf.len = 0; + buf_appendb(&dbuf, fn, strlen(fn) + 1); + buf_appendb(&dbuf, msec, strlen(msec) + 1); + buf_appendb(&dbuf, mtitle, strlen(mtitle) + 1); + buf_appendb(&dbuf, arch ? arch : "", arch ? strlen(arch) + 1 : 1); - sv = rval.size; + sv = dbuf.len; /* Fix the record number in the btree value. */ - memset(val.data, 0, sizeof(uint32_t)); - memcpy(val.data + 4, &rec, sizeof(uint32_t)); - if (mdoc) - pmdoc(db, fbbuf, &key, &ksz, - &val, &rval, &rsz, mdoc); + pmdoc_node(hash, &buf, &dbuf, + mdoc_node(mdoc), mdoc_meta(mdoc)); else - pman(db, fbbuf, &key, &ksz, - &val, &rval, &rsz, man); + pman_node(hash, &buf, &dbuf, man_node(man)); + + /* + * Copy from the in-memory hashtable of pending keywords + * into the database. + */ + memset(vbuf, 0, sizeof(uint32_t)); + memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + + seq = R_FIRST; + while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { + memcpy(vbuf, val.data, sizeof(uint32_t)); + val.size = sizeof(vbuf); + val.data = vbuf; + dbt_put(db, fbbuf, &key, &val); + + if ((*hash->del)(hash, &key, 0) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + seq = R_NEXT; + } + + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + /* - * Apply this to the index. If we haven't had a - * description set, put an empty one in now. + * Apply to the index. If we haven't had a description + * set, put an empty one in now. */ - if (rval.size == sv) - dbt_appendb(&rval, &rsz, "", 1); + if (dbuf.len == sv) + buf_appendb(&dbuf, "", 1); - rkey.data = &rec; - dbt_put(idx, ibbuf, &rkey, &rval); + key.data = &rec; + key.size = sizeof(recno_t); - printf("Indexed: %s\n", fn); + val.data = dbuf.cp; + val.size = dbuf.len; + + dbt_put(idx, ibbuf, &key, &val); rec++; } (*db->close)(db); (*idx->close)(idx); + (*hash->close)(hash); mparse_free(mp); - free(key.data); - free(rval.data); + free(buf.cp); + free(dbuf.cp); /* Atomically replace the file with our temporary one. */ @@ -433,130 +471,62 @@ main(int argc, char *argv[]) } /* - * Initialise the stored database key whose data buffer is shared - * between uses (as the key must sometimes be constructed from an array - * of + * Grow the buffer (if necessary) and copy in a binary string. */ static void -dbt_init(DBT *key, size_t *ksz) +buf_appendb(struct buf *buf, const void *cp, size_t sz) { - if (0 == *ksz) { - assert(0 == key->size); - assert(NULL == key->data); - key->data = mandoc_malloc(MANDOC_BUFSZ); - *ksz = MANDOC_BUFSZ; - } - - key->size = 0; -} - -/* - * Append a binary value to a database entry. This can be invoked - * multiple times; the buffer is automatically resized. - */ -static void -dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz) -{ - - assert(key->data); - /* Overshoot by MANDOC_BUFSZ. */ - while (key->size + sz >= *ksz) { - *ksz = key->size + sz + MANDOC_BUFSZ; - key->data = mandoc_realloc(key->data, *ksz); + while (buf->len + sz >= buf->size) { + buf->size = buf->len + sz + MANDOC_BUFSZ; + buf->cp = mandoc_realloc(buf->cp, buf->size); } -#if 0 - dstp = key->data + (int)key->size; - - while (NULL != (endp = memchr(cp, '\\', sz))) { - ssz = endp - cp; - memcpy(dstp, cp, ssz); - - dstp += ssz; - key->size += ssz; - sz -= ssz; - - cp = endp++; - /* FIXME: expects nil-terminated string! */ - esc = mandoc_escape((const char **)&endp, NULL, NULL); - - switch (esc) { - case (ESCAPE_ERROR): - /* Nil-terminate this point. */ - memcpy(dstp, "", 1); - key->size++; - return; - case (ESCAPE_PREDEF): - /* FALLTHROUGH */ - case (ESCAPE_SPECIAL): - break; - default: - sz -= endp - cp; - cp = endp; - continue; - } - - ssz = endp - cp; - memcpy(dstp, cp, ssz); - - dstp += ssz; - key->size += ssz; - sz -= ssz; - - cp = endp; - } -#endif - - memcpy(key->data + (int)key->size, cp, sz); - key->size += sz; + memcpy(buf->cp + (int)buf->len, cp, sz); + buf->len += sz; } /* - * Append a nil-terminated string to the database entry. This can be - * invoked multiple times. The database entry will be nil-terminated as - * well; if invoked multiple times, a space is put between strings. + * Append a nil-terminated string to the buffer. + * This can be invoked multiple times. + * The buffer string will be nil-terminated. + * If invoked multiple times, a space is put between strings. */ static void -dbt_append(DBT *key, size_t *ksz, const char *cp) +buf_append(struct buf *buf, const char *cp) { size_t sz; if (0 == (sz = strlen(cp))) return; - assert(key->data); + if (buf->len) + buf->cp[(int)buf->len - 1] = ' '; - if (key->size) - ((char *)key->data)[(int)key->size - 1] = ' '; - - dbt_appendb(key, ksz, cp, sz + 1); + buf_appendb(buf, cp, sz + 1); } /* ARGSUSED */ static void pmdoc_An(MDOC_ARGS) { - uint32_t fl; if (SEC_AUTHORS != n->sec) return; for (n = n->child; n; n = n->next) if (MDOC_TEXT == n->type) - dbt_append(key, ksz, n->string); + buf_append(buf, n->string); - fl = (uint32_t)MANDOC_AUTHOR; - memcpy(val->data, &fl, 4); + hash_put(hash, buf, TYPE_AUTHOR); } /* ARGSUSED */ static void pmdoc_Fd(MDOC_ARGS) { - uint32_t fl; const char *start, *end; size_t sz; @@ -592,51 +562,46 @@ pmdoc_Fd(MDOC_ARGS) end--; assert(end >= start); - dbt_appendb(key, ksz, start, (size_t)(end - start + 1)); - dbt_appendb(key, ksz, "", 1); - fl = (uint32_t)MANDOC_INCLUDES; - memcpy(val->data, &fl, 4); + buf_appendb(buf, start, (size_t)(end - start + 1)); + buf_appendb(buf, "", 1); + + hash_put(hash, buf, TYPE_INCLUDES); } /* ARGSUSED */ static void pmdoc_Cd(MDOC_ARGS) { - uint32_t fl; if (SEC_SYNOPSIS != n->sec) return; for (n = n->child; n; n = n->next) if (MDOC_TEXT == n->type) - dbt_append(key, ksz, n->string); + buf_append(buf, n->string); - fl = (uint32_t)MANDOC_CONFIG; - memcpy(val->data, &fl, 4); + hash_put(hash, buf, TYPE_CONFIG); } /* ARGSUSED */ static void pmdoc_In(MDOC_ARGS) { - uint32_t fl; if (SEC_SYNOPSIS != n->sec) return; if (NULL == n->child || MDOC_TEXT != n->child->type) return; - dbt_append(key, ksz, n->child->string); - fl = (uint32_t)MANDOC_INCLUDES; - memcpy(val->data, &fl, 4); + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_INCLUDES); } /* ARGSUSED */ static void pmdoc_Fn(MDOC_ARGS) { - uint32_t fl; const char *cp; if (SEC_SYNOPSIS != n->sec) @@ -655,32 +620,28 @@ pmdoc_Fn(MDOC_ARGS) while ('*' == *cp) cp++; - dbt_append(key, ksz, cp); - fl = (uint32_t)MANDOC_FUNCTION; - memcpy(val->data, &fl, 4); + buf_append(buf, cp); + hash_put(hash, buf, TYPE_FUNCTION); } /* ARGSUSED */ static void pmdoc_St(MDOC_ARGS) { - uint32_t fl; if (SEC_STANDARDS != n->sec) return; if (NULL == n->child || MDOC_TEXT != n->child->type) return; - dbt_append(key, ksz, n->child->string); - fl = (uint32_t)MANDOC_STANDARD; - memcpy(val->data, &fl, 4); + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_STANDARD); } /* ARGSUSED */ static void pmdoc_Vt(MDOC_ARGS) { - uint32_t fl; const char *start; size_t sz; @@ -709,27 +670,23 @@ pmdoc_Vt(MDOC_ARGS) if (0 == sz) return; - dbt_appendb(key, ksz, start, sz); - dbt_appendb(key, ksz, "", 1); - - fl = (uint32_t)MANDOC_VARIABLE; - memcpy(val->data, &fl, 4); + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); + hash_put(hash, buf, TYPE_VARIABLE); } /* ARGSUSED */ static void pmdoc_Fo(MDOC_ARGS) { - uint32_t fl; if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) return; if (NULL == n->child || MDOC_TEXT != n->child->type) return; - dbt_append(key, ksz, n->child->string); - fl = (uint32_t)MANDOC_FUNCTION; - memcpy(val->data, &fl, 4); + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_FUNCTION); } @@ -738,56 +695,89 @@ static void pmdoc_Nd(MDOC_ARGS) { int first; + size_t sz; for (first = 1, n = n->child; n; n = n->next) { if (MDOC_TEXT != n->type) continue; - if (first) - dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1); - else - dbt_append(rval, rsz, n->string); + + if (first) { + sz = strlen(n->string) + 1; + buf_appendb(dbuf, n->string, sz); + buf_appendb(buf, n->string, sz); + } else { + buf_append(dbuf, n->string); + buf_append(buf, n->string); + } + first = 0; } + + hash_put(hash, buf, TYPE_DESC); } /* ARGSUSED */ static void pmdoc_Nm(MDOC_ARGS) { - uint32_t fl; if (SEC_NAME == n->sec) { - for (n = n->child; n; n = n->next) { - if (MDOC_TEXT != n->type) - continue; - dbt_append(key, ksz, n->string); - } - fl = (uint32_t)MANDOC_NAME; - memcpy(val->data, &fl, 4); + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + buf_append(buf, n->string); + hash_put(hash, buf, TYPE_NAME); return; } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) return; - for (n = n->child; n; n = n->next) { - if (MDOC_TEXT != n->type) - continue; - dbt_append(key, ksz, n->string); - } + if (NULL == n->child) + buf_append(buf, m->name); - fl = (uint32_t)MANDOC_UTILITY; - memcpy(val->data, &fl, 4); + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + buf_append(buf, n->string); + + hash_put(hash, buf, TYPE_UTILITY); } static void +hash_put(DB *db, const struct buf *buf, int mask) +{ + DBT key, val; + int rc; + + key.data = buf->cp; + + if ((key.size = buf->len) < 2) + return; + + if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (0 == rc) + mask |= *(int *)val.data; + + val.data = &mask; + val.size = sizeof(int); + + /*fprintf(stderr, "Hashing: [%s] (0x%x)\n", + (char *)key.data, mask);*/ + + if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } +} + +static void dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) { if (0 == key->size) return; - assert(key->data); + assert(key->size); assert(val->size); - assert(val->data); if (0 == (*db->put)(db, key, val, 0)) return; @@ -821,17 +811,15 @@ pmdoc_node(MDOC_ARGS) if (NULL == mdocs[n->tok]) break; - dbt_init(key, ksz); - - (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n); - dbt_put(db, dbn, key, val); + buf->len = 0; + (*mdocs[n->tok])(hash, buf, dbuf, n, m); break; default: break; } - pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child); - pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next); + pmdoc_node(hash, buf, dbuf, n->child, m); + pmdoc_node(hash, buf, dbuf, n->next, m); } static int @@ -840,7 +828,6 @@ pman_node(MAN_ARGS) const struct man_node *head, *body; const char *start, *sv; size_t sz; - uint32_t fl; if (NULL == n) return(0); @@ -863,9 +850,6 @@ pman_node(MAN_ARGS) NULL != (body = body->child) && MAN_TEXT == body->type) { - fl = (uint32_t)MANDOC_NAME; - memcpy(val->data, &fl, 4); - assert(body->string); start = sv = body->string; @@ -884,11 +868,11 @@ pman_node(MAN_ARGS) if ('\0' == start[(int)sz]) break; - dbt_init(key, ksz); - dbt_appendb(key, ksz, start, sz); - dbt_appendb(key, ksz, "", 1); + buf->len = 0; + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); - dbt_put(db, dbn, key, val); + hash_put(hash, buf, TYPE_NAME); if (' ' == start[(int)sz]) { start += (int)sz + 1; @@ -902,8 +886,8 @@ pman_node(MAN_ARGS) } if (sv == start) { - dbt_init(key, ksz); - dbt_append(key, ksz, start); + buf->len = 0; + buf_append(buf, start); return(1); } @@ -922,41 +906,23 @@ pman_node(MAN_ARGS) while (' ' == *start) start++; - dbt_appendb(rval, rsz, start, strlen(start) + 1); + sz = strlen(start) + 1; + buf_appendb(dbuf, start, sz); + buf_appendb(buf, start, sz); } } - if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child)) + if (pman_node(hash, buf, dbuf, n->child)) return(1); - if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next)) + if (pman_node(hash, buf, dbuf, n->next)) return(1); return(0); } static void -pman(DB *db, const char *dbn, DBT *key, size_t *ksz, - DBT *val, DBT *rval, size_t *rsz, struct man *m) -{ - - pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m)); -} - - -static void -pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz, - DBT *val, DBT *rval, size_t *rsz, struct mdoc *m) -{ - - pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m)); -} - -static void usage(void) { - fprintf(stderr, "usage: %s " - "[-d path] " - "[file...]\n", - progname); + fprintf(stderr, "usage: %s [-d path] [file...]\n", progname); }