===================================================================
RCS file: /cvs/mandoc/mandocdb.c,v
retrieving revision 1.38
retrieving revision 1.43
diff -u -p -r1.38 -r1.43
--- mandoc/mandocdb.c	2011/12/25 13:08:12	1.38
+++ mandoc/mandocdb.c	2011/12/31 18:47:52	1.43
@@ -1,4 +1,4 @@
-/*	$Id: mandocdb.c,v 1.38 2011/12/25 13:08:12 schwarze Exp $ */
+/*	$Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -23,6 +23,7 @@
 #include <sys/types.h>
 
 #include <assert.h>
+#include <ctype.h>
 #include <dirent.h>
 #include <fcntl.h>
 #include <getopt.h>
@@ -129,8 +130,8 @@ static	void		  ofile_argbuild(int, char *[], struct of
 static	void		  ofile_dirbuild(const char *, const char *,
 				const char *, int, struct of **);
 static	void		  ofile_free(struct of *);
-static	void		  pformatted(DB *, struct buf *, struct buf *,
-				const struct of *);
+static	void		  pformatted(DB *, struct buf *, 
+				struct buf *, const struct of *);
 static	int		  pman_node(MAN_ARGS);
 static	void		  pmdoc_node(MDOC_ARGS);
 static	int		  pmdoc_head(MDOC_ARGS);
@@ -575,7 +576,7 @@ index_merge(const struct of *of, struct mparse *mp,
 	uint64_t	 mask;
 	size_t		 sv;
 	unsigned	 seq;
-	struct db_val	 vbuf;
+	uint64_t	 vbuf[2];
 	char		 type;
 
 	rec = 0;
@@ -615,8 +616,8 @@ index_merge(const struct of *of, struct mparse *mp,
 
 		/*
 		 * By default, skip a file if the manual section
-		 * and architecture given in the file disagree
-		 * with the directory where the file is located.
+		 * given in the file disagrees with the directory
+		 * where the file is located.
 		 */
 
 		skip = 0;
@@ -631,6 +632,21 @@ index_merge(const struct of *of, struct mparse *mp,
 			skip = 1;
 		}
 
+		/*
+		 * Manual page directories exist for each kernel
+		 * architecture as returned by machine(1).
+		 * However, many manuals only depend on the
+		 * application architecture as returned by arch(1).
+		 * For example, some (2/ARM) manuals are shared
+		 * across the "armish" and "zaurus" kernel
+		 * architectures.
+		 * A few manuals are even shared across completely
+		 * different architectures, for example fdformat(1)
+		 * on amd64, i386, sparc, and sparc64.
+		 * Thus, warn about architecture mismatches,
+		 * but don't skip manuals for this reason.
+		 */
+
 		assert(of->arch);
 		assert(march);
 		if (strcasecmp(march, of->arch)) {
@@ -639,7 +655,7 @@ index_merge(const struct of *of, struct mparse *mp,
 					"architecture \"%s\" manual "
 					"in \"%s\" directory\n",
 					fn, march, of->arch);
-			skip = 1;
+			march = of->arch;
 		}
 
 		/*
@@ -714,7 +730,7 @@ index_merge(const struct of *of, struct mparse *mp,
 			recs->last = 0;
 		} else
 			rec++;
-		vbuf.rec = htobe32(rec);
+		vbuf[1] = htobe64(rec);
 
 		/*
 		 * Copy from the in-memory hashtable of pending
@@ -726,8 +742,8 @@ index_merge(const struct of *of, struct mparse *mp,
 			seq = R_NEXT;
 			assert(sizeof(uint64_t) == val.size);
 			memcpy(&mask, val.data, val.size);
-			vbuf.mask = htobe64(mask);
-			val.size = sizeof(struct db_val);
+			vbuf[0] = htobe64(mask);
+			val.size = sizeof(vbuf);
 			val.data = &vbuf;
 			dbt_put(mdb->db, mdb->dbn, &key, &val);
 		}
@@ -768,7 +784,7 @@ index_prune(const struct of *ofile, struct mdb *mdb, s
 {
 	const struct of	*of;
 	const char	*fn;
-	struct db_val	*vbuf;
+	uint64_t	 vbuf[2];
 	unsigned	 seq, sseq;
 	DBT		 key, val;
 	int		 ch;
@@ -817,11 +833,11 @@ index_prune(const struct of *ofile, struct mdb *mdb, s
 		while (0 == (ch = (*mdb->db->seq)(mdb->db,
 					&key, &val, sseq))) {
 			sseq = R_NEXT;
-			if (sizeof(struct db_val) != val.size)
+			if (sizeof(vbuf) != val.size)
 				break;
 
-			vbuf = val.data;
-			if (recs->last != betoh32(vbuf->rec))
+			memcpy(vbuf, val.data, val.size);
+			if (recs->last != betoh64(vbuf[1]))
 				continue;
 
 			if ((ch = (*mdb->db->del)(mdb->db,
@@ -1319,6 +1335,8 @@ pman_node(MAN_ARGS)
 
 			if (0 == strncmp(start, "-", 1))
 				start += 1;
+			else if (0 == strncmp(start, "\\-\\-", 4))
+				start += 4;
 			else if (0 == strncmp(start, "\\-", 2))
 				start += 2;
 			else if (0 == strncmp(start, "\\(en", 4))
@@ -1349,12 +1367,12 @@ pman_node(MAN_ARGS)
  * By necessity, this involves rather crude guesswork.
  */
 static void
-pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
-		 const struct of *of)
+pformatted(DB *hash, struct buf *buf, 
+		struct buf *dbuf, const struct of *of)
 {
 	FILE		*stream;
-	char		*line, *p;
-	size_t		 len, plen;
+	char		*line, *p, *title;
+	size_t		 len, plen, titlesz;
 
 	if (NULL == (stream = fopen(of->fname, "r"))) {
 		if (warnings)
@@ -1387,7 +1405,33 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf
 	while (NULL != (line = fgetln(stream, &len)))
 		if ('\n' != *line && ' ' != *line)
 			break;
+	
+	/*
+	 * Read up until the next section into a buffer.
+	 * Strip the leading and trailing newline from each read line,
+	 * appending a trailing space.
+	 * Ignore empty (whitespace-only) lines.
+	 */
 
+	titlesz = 0;
+	title = NULL;
+
+	while (NULL != (line = fgetln(stream, &len))) {
+		if (' ' != *line || '\n' != line[(int)len - 1])
+			break;
+		while (len > 0 && isspace((unsigned char)*line)) {
+			line++;
+			len--;
+		}
+		if (1 == len)
+			continue;
+		title = mandoc_realloc(title, titlesz + len);
+		memcpy(title + titlesz, line, len);
+		titlesz += len;
+		title[(int)titlesz - 1] = ' ';
+	}
+
+
 	/*
 	 * If no page content can be found, or the input line
 	 * is already the next section header, or there is no
@@ -1395,18 +1439,19 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf
 	 * description.
 	 */
 
-	line = fgetln(stream, &len);
-	if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
+	if (NULL == title || '\0' == *title) {
 		if (warnings)
 			fprintf(stderr, "%s: cannot find NAME section\n",
 					of->fname);
 		buf_appendb(dbuf, buf->cp, buf->size);
 		hash_put(hash, buf, TYPE_Nd);
 		fclose(stream);
+		free(title);
 		return;
 	}
 
-	line[(int)--len] = '\0';
+	title = mandoc_realloc(title, titlesz + 1);
+	title[(int)titlesz] = '\0';
 
 	/*
 	 * Skip to the first dash.
@@ -1414,20 +1459,17 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf
 	 * bytes).
 	 */
 
-	if (NULL != (p = strstr(line, "- "))) {
+	if (NULL != (p = strstr(title, "- "))) {
 		for (p += 2; ' ' == *p || '\b' == *p; p++)
 			/* Skip to next word. */ ;
 	} else {
 		if (warnings)
 			fprintf(stderr, "%s: no dash in title line\n",
 					of->fname);
-		p = line;
+		p = title;
 	}
 
-	if ((plen = strlen(p)) > 70) {
-		plen = 70;
-		p[plen] = '\0';
-	}
+	plen = strlen(p);
 
 	/* Strip backspace-encoding from line. */
 
@@ -1446,13 +1488,15 @@ pformatted(DB *hash, struct buf *buf, struct buf *dbuf
 	buf_appendb(buf, p, plen + 1);
 	hash_put(hash, buf, TYPE_Nd);
 	fclose(stream);
+	free(title);
 }
 
 static void
 ofile_argbuild(int argc, char *argv[], struct of **of)
 {
 	char		 buf[MAXPATHLEN];
-	char		*sec, *arch, *title, *p;
+	const char	*sec, *arch, *title;
+	char		*p;
 	int		 i, src_form;
 	struct of	*nof;
 
@@ -1538,7 +1582,7 @@ ofile_argbuild(int argc, char *argv[], struct of **of)
  * Recursively build up a list of files to parse.
  * We use this instead of ftw() and so on because I don't want global
  * variables hanging around.
- * This ignores the mandoc.db and mandoc.index files, but assumes that
+ * This ignores the whatis.db and whatis.index files, but assumes that
  * everything else is a manual.
  * Pass in a pointer to a NULL structure for the first invocation.
  */
@@ -1756,6 +1800,7 @@ ofile_dirbuild(const char *dir, const char* psec, cons
 
 		if (verb > 1)
 			printf("%s: scheduling\n", buf);
+
 		if (NULL == *of) {
 			*of = nof;
 			(*of)->first = nof;
@@ -1774,7 +1819,10 @@ ofile_free(struct of *of)
 {
 	struct of	*nof;
 
-	while (of) {
+	if (NULL != of)
+		of = of->first;
+
+	while (NULL != of) {
 		nof = of->next;
 		free(of->fname);
 		free(of->sec);