=================================================================== RCS file: /cvs/mandoc/cgi.c,v retrieving revision 1.22 retrieving revision 1.31 diff -u -p -r1.22 -r1.31 --- mandoc/cgi.c 2011/12/10 00:06:34 1.22 +++ mandoc/cgi.c 2011/12/14 13:36:59 1.31 @@ -1,4 +1,4 @@ -/* $Id: cgi.c,v 1.22 2011/12/10 00:06:34 kristaps Exp $ */ +/* $Id: cgi.c,v 1.31 2011/12/14 13:36:59 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -54,26 +55,27 @@ enum page { PAGE__MAX }; +struct paths { + char *name; + char *path; +}; + /* * A query as passed to the search function. - * See kval_query() on how this is parsed. */ struct query { const char *arch; /* architecture */ const char *sec; /* manual section */ const char *expr; /* unparsed expression string */ + int manroot; /* manroot index (or -1)*/ int whatis; /* whether whatis mode */ int legacy; /* whether legacy mode */ }; -struct kval { - char *key; - char *val; -}; - struct req { - struct kval *fields; - size_t fieldsz; + struct query q; + struct paths *p; + size_t psz; enum page page; }; @@ -83,17 +85,13 @@ static int cmp(const void *, const void *); static void format(const char *); static void html_print(const char *); static void html_putchar(char); -static int kval_decode(char *); -static void kval_free(struct kval *, size_t); -static void kval_parse(struct kval **, size_t *, char *); -static void kval_query(struct query *, - const struct kval *, size_t); -static void pg_index(const struct manpaths *, - const struct req *, char *); -static void pg_search(const struct manpaths *, - const struct req *, char *); -static void pg_show(const struct manpaths *, - const struct req *, char *); +static int http_decode(char *); +static void http_parse(struct req *, char *); +static int pathstop(DIR *); +static void pathgen(DIR *, char *, struct req *); +static void pg_index(const struct req *, char *); +static void pg_search(const struct req *, char *); +static void pg_show(const struct req *, char *); static void resp_bad(void); static void resp_baddb(void); static void resp_error400(void); @@ -105,9 +103,10 @@ static void resp_index(const struct req *); static void resp_search(struct res *, size_t, void *); static void resp_searchform(const struct req *); -static const char *progname; -static const char *cache; -static const char *host; +static const char *progname; /* cgi script name */ +static const char *cache; /* cache directory */ +static const char *css; /* css directory */ +static const char *host; /* hostname */ static const char * const pages[PAGE__MAX] = { "index", /* PAGE_INDEX */ @@ -116,56 +115,6 @@ static const char * const pages[PAGE__MAX] = { }; /* - * Initialise and parse a query structure from input. - * This accomodates for mdocml's man.cgi and also for legacy man.cgi - * input keys ("sektion" and "apropos"). - * Note that legacy mode has some quirks: if apropos legacy mode is - * detected, we unset the section and architecture string. - */ -static void -kval_query(struct query *q, const struct kval *fields, size_t sz) -{ - int i, legacy; - - memset(q, 0, sizeof(struct query)); - q->whatis = 1; - legacy = -1; - - for (i = 0; i < (int)sz; i++) - if (0 == strcmp(fields[i].key, "expr")) - q->expr = fields[i].val; - else if (0 == strcmp(fields[i].key, "query")) - q->expr = fields[i].val; - else if (0 == strcmp(fields[i].key, "sec")) - q->sec = fields[i].val; - else if (0 == strcmp(fields[i].key, "sektion")) - q->sec = fields[i].val; - else if (0 == strcmp(fields[i].key, "arch")) - q->arch = fields[i].val; - else if (0 == strcmp(fields[i].key, "apropos")) - legacy = 0 == strcmp - (fields[i].val, "0"); - else if (0 == strcmp(fields[i].key, "op")) - q->whatis = 0 == strcasecmp - (fields[i].val, "whatis"); - - /* Test for old man.cgi compatibility mode. */ - - if (legacy == 0) { - q->whatis = 0; - q->legacy = 1; - } else if (legacy > 0) { - q->legacy = 1; - q->whatis = 1; - } - - /* Section "0" means no section when in legacy mode. */ - - if (q->legacy && NULL != q->sec && 0 == strcmp(q->sec, "0")) - q->sec = NULL; -} - -/* * This is just OpenBSD's strtol(3) suggestion. * I use it instead of strtonum(3) for portability's sake. */ @@ -229,31 +178,24 @@ html_print(const char *p) html_putchar(*p++); } -static void -kval_free(struct kval *p, size_t sz) -{ - int i; - - for (i = 0; i < (int)sz; i++) { - free(p[i].key); - free(p[i].val); - } - free(p); -} - /* * Parse out key-value pairs from an HTTP request variable. * This can be either a cookie or a POST/GET string, although man.cgi * uses only GET for simplicity. */ static void -kval_parse(struct kval **kv, size_t *kvsz, char *p) +http_parse(struct req *req, char *p) { - char *key, *val; - size_t sz, cur; + char *key, *val, *manroot; + size_t sz; + int i, legacy; - cur = 0; + memset(&req->q, 0, sizeof(struct query)); + req->q.whatis = 1; + legacy = -1; + manroot = NULL; + while (p && '\0' != *p) { while (' ' == *p) p++; @@ -287,21 +229,59 @@ kval_parse(struct kval **kv, size_t *kvsz, char *p) /* Just abort handling. */ - if ( ! kval_decode(key)) - return; - if ( ! kval_decode(val)) - return; + if ( ! http_decode(key)) + break; + if ( ! http_decode(val)) + break; - if (*kvsz + 1 >= cur) { - cur++; - *kv = mandoc_realloc - (*kv, cur * sizeof(struct kval)); - } + if (0 == strcmp(key, "expr")) + req->q.expr = val; + else if (0 == strcmp(key, "query")) + req->q.expr = val; + else if (0 == strcmp(key, "sec")) + req->q.sec = val; + else if (0 == strcmp(key, "sektion")) + req->q.sec = val; + else if (0 == strcmp(key, "arch")) + req->q.arch = val; + else if (0 == strcmp(key, "manpath")) + manroot = val; + else if (0 == strcmp(key, "apropos")) + legacy = 0 == strcmp(val, "0"); + else if (0 == strcmp(key, "op")) + req->q.whatis = 0 == strcasecmp(val, "whatis"); + } - (*kv)[(int)*kvsz].key = mandoc_strdup(key); - (*kv)[(int)*kvsz].val = mandoc_strdup(val); - (*kvsz)++; + /* Test for old man.cgi compatibility mode. */ + + if (legacy == 0) { + req->q.whatis = 0; + req->q.legacy = 1; + } else if (legacy > 0) { + req->q.legacy = 1; + req->q.whatis = 1; } + + /* + * Section "0" means no section when in legacy mode. + * For some man.cgi scripts, "default" arch is none. + */ + + if (req->q.legacy && NULL != req->q.sec) + if (0 == strcmp(req->q.sec, "0")) + req->q.sec = NULL; + if (req->q.legacy && NULL != req->q.arch) + if (0 == strcmp(req->q.arch, "default")) + req->q.arch = NULL; + + /* Default to first manroot. */ + + if (NULL != manroot) { + for (i = 0; i < (int)req->psz; i++) + if (0 == strcmp(req->p[i].name, manroot)) + break; + req->q.manroot = i < (int)req->psz ? i : -1; + } } /* @@ -310,7 +290,7 @@ kval_parse(struct kval **kv, size_t *kvsz, char *p) * over the allocated string. */ static int -kval_decode(char *p) +http_decode(char *p) { char hex[3]; int c; @@ -359,19 +339,19 @@ resp_begin_html(int code, const char *msg) resp_begin_http(code, msg); - puts("\n" - "\n" - "\n" - "\n" - "\n" - "System Manpage Reference\n" - "\n" - "\n" - ""); + printf("\n" + "\n" + "\n" + "\n" + "\n" + "System Manpage Reference\n" + "\n" + "\n" + "\n", css); } static void @@ -385,31 +365,43 @@ resp_end_html(void) static void resp_searchform(const struct req *req) { - struct query q; + int i; - kval_query(&q, req->fields, req->fieldsz); - puts(""); - printf("
\n"); - printf("
\n" + printf("\n" + "
\n" "Search Parameters\n" " or \n" " for manuals satisfying \n" - "q.expr ? req->q.expr : ""); printf("\">, section " "q.sec ? req->q.sec : ""); printf("\">, arch " ".\n" + html_print(req->q.arch ? req->q.arch : ""); + printf("\">"); + if (req->psz > 1) { + puts(", "); + } + puts(".\n" "\n" "
\n" ""); @@ -478,17 +470,19 @@ static void resp_search(struct res *r, size_t sz, void *arg) { int i; - struct query q; const struct req *req; + req = (const struct req *)arg; + assert(req->q.manroot >= 0); + if (1 == sz) { /* * If we have just one result, then jump there now * without any delay. */ puts("Status: 303 See Other"); - printf("Location: http://%s%s/show/%u/%u.html\n", - host, progname, + printf("Location: http://%s%s/show/%d/%u/%u.html\n", + host, progname, req->q.manroot, r[0].volume, r[0].rec); puts("Content-Type: text/html; charset=utf-8\n"); return; @@ -497,24 +491,20 @@ resp_search(struct res *r, size_t sz, void *arg) qsort(r, sz, sizeof(struct res), cmp); resp_begin_html(200, NULL); - - req = (const struct req *)arg; resp_searchform(req); - kval_query(&q, req->fields, req->fieldsz); if (0 == sz) { printf("

\n" - "No %s results found.", - q.whatis ? "whatis" : "apropos"); - if (q.whatis) { - printf("(Try q.whatis ? "whatis" : "apropos"); + if (req->q.whatis) { + printf("(Try q.expr ? req->q.expr : ""); printf("&sec="); - html_print(q.sec ? q.sec : ""); + html_print(req->q.sec ? req->q.sec : ""); printf("&arch="); - html_print(q.arch ? q.arch : ""); + html_print(req->q.arch ? req->q.arch : ""); puts("\">apropos?)"); } puts("

"); @@ -528,9 +518,9 @@ resp_search(struct res *r, size_t sz, void *arg) for (i = 0; i < (int)sz; i++) { printf("\n" "\n" - "", r[i].volume, r[i].rec); + "", + progname, req->q.manroot, + r[i].volume, r[i].rec); html_print(r[i].title); putchar('('); html_print(r[i].cat); @@ -552,7 +542,7 @@ resp_search(struct res *r, size_t sz, void *arg) /* ARGSUSED */ static void -pg_index(const struct manpaths *ps, const struct req *req, char *path) +pg_index(const struct req *req, char *path) { resp_index(req); @@ -573,20 +563,20 @@ catman(const char *file) } resp_begin_http(200, NULL); - puts("\n" - "\n" - "\n" - "\n" - "\n" - "System Manpage Reference\n" - "\n" - "\n" - "\n" - "
");
+	printf("\n"
+	       "\n"
+	       "\n"
+	       "\n"
+	       "\n"
+	       "System Manpage Reference\n"
+	       "\n"
+	       "\n"
+	       "\n"
+	       "
\n", css);
 
 	while (NULL != (p = fgetln(f, &len))) {
 		bold = italic = 0;
@@ -729,10 +719,10 @@ format(const char *file)
 		return;
 	}
 
-	snprintf(opts, sizeof(opts), "style=/man.css,"
+	snprintf(opts, sizeof(opts), "style=%s/man.css,"
 			"man=%s/search.html?sec=%%S&expr=%%N,"
 			/*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
-			progname);
+			css, progname);
 
 	mparse_result(mp, &mdoc, &man);
 	vp = html_alloc(opts);
@@ -751,42 +741,74 @@ format(const char *file)
 }
 
 static void
-pg_show(const struct manpaths *ps, const struct req *req, char *path)
+pg_show(const struct req *req, char *path)
 {
+	struct manpaths	 ps;
 	char		*sub;
 	char		 file[MAXPATHLEN];
 	const char	*fn, *cp;
 	int		 rc;
-	unsigned int	 vol, rec;
+	unsigned int	 vol, rec, mr;
 	DB		*idx;
 	DBT		 key, val;
 
-	if (NULL == path) {
+	idx = NULL;
+
+	/* Parse out mroot, volume, and record from the path. */
+
+	if (NULL == path || NULL == (sub = strchr(path, '/'))) {
 		resp_error400();
 		return;
-	} else if (NULL == (sub = strrchr(path, '/'))) {
+	} 
+	*sub++ = '\0';
+	if ( ! atou(path, &mr)) {
 		resp_error400();
 		return;
-	} else
-		*sub++ = '\0';
-
-	if ( ! (atou(path, &vol) && atou(sub, &rec))) {
+	}
+	path = sub;
+	if (NULL == (sub = strchr(path, '/'))) {
 		resp_error400();
 		return;
-	} else if (vol >= (unsigned int)ps->sz) {
+	}
+	*sub++ = '\0';
+	if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
 		resp_error400();
 		return;
+	} else if (mr >= (unsigned int)req->psz) {
+		resp_error400();
+		return;
 	}
 
-	strlcpy(file, ps->paths[vol], MAXPATHLEN);
+	/*
+	 * Begin by chdir()ing into the manroot.
+	 * This way we can pick up the database files, which are
+	 * relative to the manpath root.
+	 */
+
+	if (-1 == chdir(req->p[(int)mr].path)) {
+		perror(req->p[(int)mr].path);
+		resp_baddb();
+		return;
+	}
+
+	memset(&ps, 0, sizeof(struct manpaths));
+	manpath_manconf(&ps, "etc/catman.conf");
+
+	if (vol >= (unsigned int)ps.sz) {
+		resp_error400();
+		goto out;
+	}
+
+	strlcpy(file, ps.paths[vol], MAXPATHLEN);
 	strlcat(file, "/mandoc.index", MAXPATHLEN);
 
 	/* Open the index recno(3) database. */
 
 	idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
 	if (NULL == idx) {
+		perror(file);
 		resp_baddb();
-		return;
+		goto out;
 	}
 
 	key.data = &rec;
@@ -806,42 +828,60 @@ pg_show(const struct manpaths *ps, const struct req *r
 	else if (NULL == memchr(fn, '\0', val.size - (fn - cp)))
 		resp_baddb();
 	else {
-		strlcpy(file, cache, MAXPATHLEN);
-		strlcat(file, "/", MAXPATHLEN);
-		strlcat(file, fn, MAXPATHLEN);
 		if (0 == strcmp(cp, "cat"))
-			catman(file);
+			catman(fn + 1);
 		else
-			format(file);
+			format(fn + 1);
 	}
 out:
-	(*idx->close)(idx);
+	if (idx)
+		(*idx->close)(idx);
+	manpath_free(&ps);
 }
 
 static void
-pg_search(const struct manpaths *ps, const struct req *req, char *path)
+pg_search(const struct req *req, char *path)
 {
 	size_t		  tt;
+	struct manpaths	  ps;
 	int		  i, sz, rc;
 	const char	 *ep, *start;
 	char		**cp;
 	struct opts	  opt;
 	struct expr	 *expr;
-	struct query	  q;
 
-	kval_query(&q, req->fields, req->fieldsz);
+	if (req->q.manroot < 0 || 0 == req->psz) {
+		resp_search(NULL, 0, (void *)req);
+		return;
+	}
+
 	memset(&opt, 0, sizeof(struct opts));
 
-	ep 	 = q.expr;
-	opt.arch = q.arch;
-	opt.cat  = q.sec;
+	ep 	 = req->q.expr;
+	opt.arch = req->q.arch;
+	opt.cat  = req->q.sec;
 	rc 	 = -1;
 	sz 	 = 0;
 	cp	 = NULL;
 
 	/*
-	 * Poor man's tokenisation.
-	 * Just break apart by spaces.
+	 * Begin by chdir()ing into the root of the manpath.
+	 * This way we can pick up the database files, which are
+	 * relative to the manpath root.
+	 */
+
+	assert(req->q.manroot < (int)req->psz);
+	if (-1 == (chdir(req->p[req->q.manroot].path))) {
+		perror(req->p[req->q.manroot].path);
+		resp_search(NULL, 0, (void *)req);
+		return;
+	}
+
+	memset(&ps, 0, sizeof(struct manpaths));
+	manpath_manconf(&ps, "etc/catman.conf");
+
+	/*
+	 * Poor man's tokenisation: just break apart by spaces.
 	 * Yes, this is half-ass.  But it works for now.
 	 */
 
@@ -865,12 +905,12 @@ pg_search(const struct manpaths *ps, const struct req 
 	 * The resp_search() function is called with the results.
 	 */
 
-	expr = q.whatis ? termcomp(sz, cp, &tt) :
-		          exprcomp(sz, cp, &tt);
+	expr = req->q.whatis ? 
+		termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
 
 	if (NULL != expr)
 		rc = apropos_search
-			(ps->sz, ps->paths, &opt,
+			(ps.sz, ps.paths, &opt,
 			 expr, tt, (void *)req, resp_search);
 
 	/* ...unless errors occured. */
@@ -885,41 +925,65 @@ pg_search(const struct manpaths *ps, const struct req 
 
 	free(cp);
 	exprfree(expr);
+	manpath_free(&ps);
 }
 
 int
 main(void)
 {
 	int		 i;
+	char		 buf[MAXPATHLEN];
+	DIR		*cwd;
 	struct req	 req;
 	char		*p, *path, *subpath;
-	struct manpaths	 paths;
 
-	/* HTTP init: read and parse the query string. */
+	/* Scan our run-time environment. */
 
-	progname = getenv("SCRIPT_NAME");
-	if (NULL == progname)
+	if (NULL == (cache = getenv("CACHE_DIR")))
+		cache = "/cache/man.cgi";
+
+	if (NULL == (progname = getenv("SCRIPT_NAME")))
 		progname = "";
 
-	cache = getenv("CACHE_DIR");
-	if (NULL == cache)
-		cache = "/cache/man.cgi";
+	if (NULL == (css = getenv("CSS_DIR")))
+		css = "";
 
+	if (NULL == (host = getenv("HTTP_HOST")))
+		host = "localhost";
+
+	/*
+	 * First we change directory into the cache directory so that
+	 * subsequent scanning for manpath directories is rooted
+	 * relative to the same position.
+	 */
+
 	if (-1 == chdir(cache)) {
+		perror(cache);
 		resp_bad();
 		return(EXIT_FAILURE);
-	}
+	} else if (NULL == (cwd = opendir(cache))) {
+		perror(cache);
+		resp_bad();
+		return(EXIT_FAILURE);
+	} 
 
-	host = getenv("HTTP_HOST");
-	if (NULL == host)
-		host = "localhost";
-
 	memset(&req, 0, sizeof(struct req));
 
+	strlcpy(buf, ".", MAXPATHLEN);
+	pathgen(cwd, buf, &req);
+	closedir(cwd);
+
+	/* Next parse out the query string. */
+
 	if (NULL != (p = getenv("QUERY_STRING")))
-		kval_parse(&req.fields, &req.fieldsz, p);
+		http_parse(&req, p);
 
-	/* Resolve leading subpath component. */
+	/*
+	 * Now juggle paths to extract information.
+	 * We want to extract our filetype (the file suffix), the
+	 * initial path component, then the trailing component(s).
+	 * Start with leading subpath component. 
+	 */
 
 	subpath = path = NULL;
 	req.page = PAGE__MAX;
@@ -950,31 +1014,29 @@ main(void)
 				break;
 			}
 
-	/* Initialise MANPATH. */
-
-	memset(&paths, 0, sizeof(struct manpaths));
-	manpath_manconf("etc/catman.conf", &paths);
-
 	/* Route pages. */
 
 	switch (req.page) {
 	case (PAGE_INDEX):
-		pg_index(&paths, &req, subpath);
+		pg_index(&req, subpath);
 		break;
 	case (PAGE_SEARCH):
-		pg_search(&paths, &req, subpath);
+		pg_search(&req, subpath);
 		break;
 	case (PAGE_SHOW):
-		pg_show(&paths, &req, subpath);
+		pg_show(&req, subpath);
 		break;
 	default:
 		resp_error404(path);
 		break;
 	}
 
-	manpath_free(&paths);
-	kval_free(req.fields, req.fieldsz);
+	for (i = 0; i < (int)req.psz; i++) {
+		free(req.p[i].path);
+		free(req.p[i].name);
+	}
 
+	free(req.p);
 	return(EXIT_SUCCESS);
 }
 
@@ -986,3 +1048,115 @@ cmp(const void *p1, const void *p2)
 				((const struct res *)p2)->title));
 }
 
+/*
+ * Check to see if an "etc" path consists of a catman.conf file.  If it
+ * does, that means that the path contains a tree created by catman(8)
+ * and should be used for indexing.
+ */
+static int
+pathstop(DIR *dir)
+{
+	struct dirent	*d;
+
+	while (NULL != (d = readdir(dir)))
+		if (DT_REG == d->d_type)
+			if (0 == strcmp(d->d_name, "catman.conf"))
+				return(1);
+
+	return(0);
+}
+
+/*
+ * Scan for indexable paths.
+ * This adds all paths with "etc/catman.conf" to the buffer.
+ */
+static void
+pathgen(DIR *dir, char *path, struct req *req)
+{
+	struct dirent	*d;
+	char		*cp;
+	DIR		*cd;
+	int		 rc;
+	size_t		 sz, ssz;
+
+	sz = strlcat(path, "/", MAXPATHLEN);
+	if (sz >= MAXPATHLEN) {
+		fprintf(stderr, "%s: Path too long", path);
+		return;
+	} 
+
+	/* 
+	 * First, scan for the "etc" directory.
+	 * If it's found, then see if it should cause us to stop.  This
+	 * happens when a catman.conf is found in the directory.
+	 */
+
+	rc = 0;
+	while (0 == rc && NULL != (d = readdir(dir))) {
+		if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
+			continue;
+
+		path[(int)sz] = '\0';
+		ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+		if (ssz >= MAXPATHLEN) {
+			fprintf(stderr, "%s: Path too long", path);
+			return;
+		} else if (NULL == (cd = opendir(path))) {
+			perror(path);
+			return;
+		} 
+		
+		rc = pathstop(cd);
+		closedir(cd);
+	}
+
+	if (rc > 0) {
+		/* This also strips the trailing slash. */
+		path[(int)--sz] = '\0';
+		req->p = mandoc_realloc
+			(req->p, 
+			 (req->psz + 1) * sizeof(struct paths));
+		/*
+		 * Strip out the leading "./" unless we're just a ".",
+		 * in which case use an empty string as our name.
+		 */
+		req->p[(int)req->psz].path = mandoc_strdup(path);
+		req->p[(int)req->psz].name = 
+			cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
+		req->psz++;
+		/* 
+		 * The name is just the path with all the slashes taken
+		 * out of it.  Simple but effective. 
+		 */
+		for ( ; '\0' != *cp; cp++) 
+			if ('/' == *cp)
+				*cp = ' ';
+		return;
+	} 
+
+	/*
+	 * If no etc/catman.conf was found, recursively enter child
+	 * directory and continue scanning.
+	 */
+
+	rewinddir(dir);
+	while (NULL != (d = readdir(dir))) {
+		if (DT_DIR != d->d_type || '.' == d->d_name[0])
+			continue;
+
+		path[(int)sz] = '\0';
+		ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+		if (ssz >= MAXPATHLEN) {
+			fprintf(stderr, "%s: Path too long", path);
+			return;
+		} else if (NULL == (cd = opendir(path))) {
+			perror(path);
+			return;
+		}
+
+		pathgen(cd, path, req);
+		closedir(cd);
+	}
+}