===================================================================
RCS file: /cvs/mandoc/mandocdb.c,v
retrieving revision 1.26
retrieving revision 1.34
diff -u -p -r1.26 -r1.34
--- mandoc/mandocdb.c	2011/12/08 01:00:58	1.26
+++ mandoc/mandocdb.c	2011/12/12 02:00:49	1.34
@@ -1,4 +1,4 @@
-/*	$Id: mandocdb.c,v 1.26 2011/12/08 01:00:58 kristaps Exp $ */
+/*	$Id: mandocdb.c,v 1.34 2011/12/12 02:00:49 schwarze Exp $ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -107,7 +107,8 @@ static	void		  index_merge(const struct of *, struct m
 				recno_t, const recno_t *, size_t);
 static	void		  index_prune(const struct of *, DB *, 
 				const char *, DB *, const char *, 
-				recno_t *, recno_t **, size_t *);
+				recno_t *, recno_t **, size_t *,
+				size_t *);
 static	void		  ofile_argbuild(int, char *[], struct of **);
 static	int		  ofile_dirbuild(const char *, const char *,
 				const char *, int, struct of **);
@@ -272,6 +273,7 @@ main(int argc, char *argv[])
 	struct manpaths	 dirs;
 	enum op		 op; /* current operation */
 	const char	*dir;
+	char		*conf_file;
 	char		*cp;
 	char		 pbuf[PATH_MAX],
 			 ibuf[MAXPATHLEN], /* index fname */
@@ -311,12 +313,16 @@ main(int argc, char *argv[])
 	maxrec = 0;
 	op = OP_NEW;
 	dir = NULL;
+	conf_file = NULL;
 
-	while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
+	while (-1 != (ch = getopt(argc, argv, "aC:d:u:v")))
 		switch (ch) {
 		case ('a'):
 			use_all = 1;
 			break;
+		case ('C'):
+			conf_file = optarg;
+			break;
 		case ('d'):
 			dir = optarg;
 			op = OP_UPDATE;
@@ -390,7 +396,7 @@ main(int argc, char *argv[])
 		of = of->first;
 
 		index_prune(of, db, fbuf, idx, ibuf,
-				&maxrec, &recs, &recsz);
+				&maxrec, &recs, &recsz, &reccur);
 
 		/*
 		 * Go to the root of the respective manual tree
@@ -425,7 +431,7 @@ main(int argc, char *argv[])
 			dirs.paths[i] = mandoc_strdup(cp);
 		}
 	} else
-		manpath_parse(&dirs, NULL, NULL);
+		manpath_parse(&dirs, conf_file, NULL, NULL);
 
 	for (i = 0; i < dirs.sz; i++) {
 		ibuf[0] = fbuf[0] = '\0';
@@ -527,29 +533,15 @@ index_merge(const struct of *of, struct mparse *mp,
 		fn = of->fname;
 
 		/*
-		 * Reclaim an empty index record, if available.
+		 * Try interpreting the file as mdoc(7) or man(7)
+		 * source code, unless it is already known to be
+		 * formatted.  Fall back to formatted mode.
 		 */
 
-		if (reccur > 0) {
-			--reccur;
-			rec = recs[(int)reccur];
-		} else if (maxrec > 0) {
-			rec = maxrec;
-			maxrec = 0;
-		} else
-			rec++;
-
 		mparse_reset(mp);
-		hash_reset(&hash);
 		mdoc = NULL;
 		man = NULL;
 
-		/*
-		 * Try interpreting the file as mdoc(7) or man(7)
-		 * source code, unless it is already known to be
-		 * formatted.  Fall back to formatted mode.
-		 */
-
 		if ((MANDOC_SRC & of->src_form ||
 		    ! (MANDOC_FORM & of->src_form)) &&
 		    MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
@@ -578,14 +570,14 @@ index_merge(const struct of *of, struct mparse *mp,
 		if (0 == use_all) {
 			assert(of->sec);
 			assert(msec);
-			if (strcmp(msec, of->sec))
+			if (strcasecmp(msec, of->sec))
 				continue;
 
 			if (NULL == arch) {
 				if (NULL != of->arch)
 					continue;
 			} else if (NULL == of->arch ||
-					strcmp(arch, of->arch))
+					strcasecmp(arch, of->arch))
 				continue;
 		}
 
@@ -624,8 +616,12 @@ index_merge(const struct of *of, struct mparse *mp,
 
 		sv = dbuf->len;
 
-		/* Fix the record number in the btree value. */
+		/*
+		 * Collect keyword/mask pairs.
+		 * Each pair will become a new btree node.
+		 */
 
+		hash_reset(&hash);
 		if (mdoc)
 			pmdoc_node(hash, buf, dbuf,
 				mdoc_node(mdoc), mdoc_meta(mdoc));
@@ -635,11 +631,25 @@ index_merge(const struct of *of, struct mparse *mp,
 			pformatted(hash, buf, dbuf, of);
 
 		/*
-		 * Copy from the in-memory hashtable of pending keywords
-		 * into the database.
+		 * Reclaim an empty index record, if available.
+		 * Use its record number for all new btree nodes.
 		 */
 
+		if (reccur > 0) {
+			--reccur;
+			rec = recs[(int)reccur];
+		} else if (maxrec > 0) {
+			rec = maxrec;
+			maxrec = 0;
+		} else
+			rec++;
 		vbuf.rec = htobe32(rec);
+
+		/*
+		 * Copy from the in-memory hashtable of pending
+		 * keyword/mask pairs into the database.
+		 */
+
 		seq = R_FIRST;
 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
 			seq = R_NEXT;
@@ -682,18 +692,17 @@ index_merge(const struct of *of, struct mparse *mp,
  */
 static void
 index_prune(const struct of *ofile, DB *db, const char *dbf, 
-		DB *idx, const char *idxf,
-		recno_t *maxrec, recno_t **recs, size_t *recsz)
+		DB *idx, const char *idxf, recno_t *maxrec,
+		recno_t **recs, size_t *recsz, size_t *reccur)
 {
 	const struct of	*of;
 	const char	*fn, *cp;
 	struct db_val	*vbuf;
 	unsigned	 seq, sseq;
 	DBT		 key, val;
-	size_t		 reccur;
 	int		 ch;
 
-	reccur = 0;
+	*reccur = 0;
 	seq = R_FIRST;
 	while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
 		seq = R_NEXT;
@@ -767,14 +776,14 @@ index_prune(const struct of *ofile, DB *db, const char
 		if (ch < 0)
 			break;
 cont:
-		if (reccur >= *recsz) {
+		if (*reccur >= *recsz) {
 			*recsz += MANDOC_SLOP;
 			*recs = mandoc_realloc
 				(*recs, *recsz * sizeof(recno_t));
 		}
 
-		(*recs)[(int)reccur] = *maxrec;
-		reccur++;
+		(*recs)[(int)*reccur] = *maxrec;
+		(*reccur)++;
 	}
 
 	if (ch < 0) {
@@ -1288,52 +1297,72 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf
 	buf_append(buf, of->title);
 	hash_put(hash, buf, TYPE_Nm);
 
-	while (NULL != (line = fgetln(stream, &len)) && '\n' != *line)
-		/* Skip to first blank line. */ ;
+	/* Skip to first blank line. */
 
-	while (NULL != (line = fgetln(stream, &len)) &&
-			('\n' == *line || ' ' == *line))
-		/* Skip to first section header. */ ;
+	while (NULL != (line = fgetln(stream, &len)))
+		if ('\n' == *line)
+			break;
 
 	/*
-	 * If no page content can be found,
-	 * reuse the page title as the page description.
+	 * Assume the first line that is not indented
+	 * is the first section header.  Skip to it.
 	 */
 
-	if (NULL == (line = fgetln(stream, &len))) {
+	while (NULL != (line = fgetln(stream, &len)))
+		if ('\n' != *line && ' ' != *line)
+			break;
+
+	/*
+	 * If no page content can be found, or the input line
+	 * is already the next section header, or there is no
+	 * trailing newline, reuse the page title as the page
+	 * description.
+	 */
+
+	line = fgetln(stream, &len);
+	if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
 		buf_appendb(dbuf, buf->cp, buf->size);
 		hash_put(hash, buf, TYPE_Nd);
 		fclose(stream);
 		return;
 	}
-	fclose(stream);
 
+	line[(int)--len] = '\0';
+
 	/*
-	 * If there is a dash, skip to the text following it.
+	 * Skip to the first dash.
+	 * Use the remaining line as the description (no more than 70
+	 * bytes).
 	 */
 
-	for (p = line, plen = len; plen; p++, plen--)
-		if ('-' == *p)
-			break;
-	for ( ; plen; p++, plen--)
-		if ('-' != *p && ' ' != *p && 8 != *p)
-			break;
-	if (0 == plen) {
+	if (NULL != (p = strstr(line, "- "))) {
+		for (p += 2; ' ' == *p || '\b' == *p; p++)
+			/* Skip to next word. */ ;
+	} else
 		p = line;
-		plen = len;
+
+	if ((plen = strlen(p)) > 70) {
+		plen = 70;
+		p[plen] = '\0';
 	}
 
-	/*
-	 * Copy the rest of the line, but no more than 70 bytes.
-	 */
+	/* Strip backspace-encoding from line. */
 
-	if (70 < plen)
-		plen = 70;
-	p[plen-1] = '\0';
-	buf_appendb(dbuf, p, plen);
+	while (NULL != (line = memchr(p, '\b', plen))) {
+		len = line - p;
+		if (0 == len) {
+			memmove(line, line + 1, plen--);
+			continue;
+		} 
+		memmove(line - 1, line + 1, plen - len);
+		plen -= 2;
+	}
+
+	buf_appendb(dbuf, p, plen + 1);
 	buf->len = 0;
-	buf_appendb(buf, p, plen);
+	buf_appendb(buf, p, plen + 1);
 	hash_put(hash, buf, TYPE_Nd);
+	fclose(stream);
 }
 
 static void
@@ -1539,6 +1568,10 @@ ofile_dirbuild(const char *dir, const char* psec, cons
 			buf[0] = '\0';
 			strlcat(buf, dir, MAXPATHLEN);
 			p = strrchr(buf, '/');
+			if (NULL != parch && NULL != p)
+				for (p--; p > buf; p--)
+					if ('/' == *p)
+						break;
 			if (NULL == p)
 				p = buf;
 			else
@@ -1632,7 +1665,8 @@ usage(void)
 {
 
 	fprintf(stderr, "usage: %s [-v] "
-			"[-d dir [files...] |"
-			" -u dir [files...] |"
-			" dir...]\n", progname);
+			"[-C file] |"
+			" dir ... |"
+			" -d dir [file ...] |"
+			" -u dir [file ...]\n", progname);
 }