=================================================================== RCS file: /cvs/mandoc/cgi.c,v retrieving revision 1.19 retrieving revision 1.25 diff -u -p -r1.19 -r1.25 --- mandoc/cgi.c 2011/12/08 22:47:09 1.19 +++ mandoc/cgi.c 2011/12/10 22:20:59 1.25 @@ -1,4 +1,4 @@ -/* $Id: cgi.c,v 1.19 2011/12/08 22:47:09 kristaps Exp $ */ +/* $Id: cgi.c,v 1.25 2011/12/10 22:20:59 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -54,14 +55,26 @@ enum page { PAGE__MAX }; -struct kval { - char *key; - char *val; +struct paths { + char *name; + char *path; }; +/* + * A query as passed to the search function. + */ +struct query { + const char *arch; /* architecture */ + const char *sec; /* manual section */ + const char *expr; /* unparsed expression string */ + int whatis; /* whether whatis mode */ + int legacy; /* whether legacy mode */ +}; + struct req { - struct kval *fields; - size_t fieldsz; + struct query q; + struct paths *p; + size_t psz; enum page page; }; @@ -71,15 +84,13 @@ static int cmp(const void *, const void *); static void format(const char *); static void html_print(const char *); static void html_putchar(char); -static int kval_decode(char *); -static void kval_parse(struct kval **, size_t *, char *); -static void kval_free(struct kval *, size_t); -static void pg_index(const struct manpaths *, - const struct req *, char *); -static void pg_search(const struct manpaths *, - const struct req *, char *); -static void pg_show(const struct manpaths *, - const struct req *, char *); +static int http_decode(char *); +static void http_parse(struct query *, char *); +static int pathstop(DIR *); +static void pathgen(DIR *, char *, struct req *); +static void pg_index(const struct req *, char *); +static void pg_search(const struct req *, char *); +static void pg_show(const struct req *, char *); static void resp_bad(void); static void resp_baddb(void); static void resp_error400(void); @@ -124,6 +135,10 @@ atou(const char *buf, unsigned *v) return(1); } +/* + * Print a character, escaping HTML along the way. + * This will pass non-ASCII straight to output: be warned! + */ static void html_putchar(char c) { @@ -148,8 +163,8 @@ html_putchar(char c) } /* - * Print a word, escaping HTML along the way. - * This will pass non-ASCII straight to output: be warned! + * Call through to html_putchar(). + * Accepts NULL strings. */ static void html_print(const char *p) @@ -161,31 +176,23 @@ html_print(const char *p) html_putchar(*p++); } -static void -kval_free(struct kval *p, size_t sz) -{ - int i; - - for (i = 0; i < (int)sz; i++) { - free(p[i].key); - free(p[i].val); - } - free(p); -} - /* * Parse out key-value pairs from an HTTP request variable. * This can be either a cookie or a POST/GET string, although man.cgi * uses only GET for simplicity. */ static void -kval_parse(struct kval **kv, size_t *kvsz, char *p) +http_parse(struct query *q, char *p) { char *key, *val; - size_t sz, cur; + size_t sz; + int legacy; - cur = 0; + memset(q, 0, sizeof(struct query)); + q->whatis = 1; + legacy = -1; + while (p && '\0' != *p) { while (' ' == *p) p++; @@ -219,21 +226,48 @@ kval_parse(struct kval **kv, size_t *kvsz, char *p) /* Just abort handling. */ - if ( ! kval_decode(key)) - return; - if ( ! kval_decode(val)) - return; + if ( ! http_decode(key)) + break; + if ( ! http_decode(val)) + break; - if (*kvsz + 1 >= cur) { - cur++; - *kv = mandoc_realloc - (*kv, cur * sizeof(struct kval)); - } + if (0 == strcmp(key, "expr")) + q->expr = val; + else if (0 == strcmp(key, "query")) + q->expr = val; + else if (0 == strcmp(key, "sec")) + q->sec = val; + else if (0 == strcmp(key, "sektion")) + q->sec = val; + else if (0 == strcmp(key, "arch")) + q->arch = val; + else if (0 == strcmp(key, "apropos")) + legacy = 0 == strcmp(val, "0"); + else if (0 == strcmp(key, "op")) + q->whatis = 0 == strcasecmp(val, "whatis"); + } - (*kv)[(int)*kvsz].key = mandoc_strdup(key); - (*kv)[(int)*kvsz].val = mandoc_strdup(val); - (*kvsz)++; + /* Test for old man.cgi compatibility mode. */ + + if (legacy == 0) { + q->whatis = 0; + q->legacy = 1; + } else if (legacy > 0) { + q->legacy = 1; + q->whatis = 1; } + + /* + * Section "0" means no section when in legacy mode. + * For some man.cgi scripts, "default" arch is none. + */ + + if (q->legacy && NULL != q->sec) + if (0 == strcmp(q->sec, "0")) + q->sec = NULL; + if (q->legacy && NULL != q->arch) + if (0 == strcmp(q->arch, "default")) + q->arch = NULL; } /* @@ -242,7 +276,7 @@ kval_parse(struct kval **kv, size_t *kvsz, char *p) * over the allocated string. */ static int -kval_decode(char *p) +http_decode(char *p) { char hex[3]; int c; @@ -277,9 +311,9 @@ resp_begin_http(int code, const char *msg) if (200 != code) printf("Status: %d %s\n", code, msg); - puts("Content-Type: text/html; charset=utf-8" "\n" - "Cache-Control: no-cache" "\n" - "Pragma: no-cache" "\n" + puts("Content-Type: text/html; charset=utf-8\n" + "Cache-Control: no-cache\n" + "Pragma: no-cache\n" ""); fflush(stdout); @@ -291,18 +325,18 @@ resp_begin_html(int code, const char *msg) resp_begin_http(code, msg); - puts("" "\n" - "" "\n" - " " "\n" - " " "\n" - " " "\n" - " System Manpage Reference" "\n" - " " "\n" - " " "\n" + puts("\n" + "\n" + "\n" + "\n" + "\n" + "System Manpage Reference\n" + "\n" + "\n" ""); } @@ -310,57 +344,39 @@ static void resp_end_html(void) { - puts(" \n"); + puts("\n" + ""); } static void resp_searchform(const struct req *req) { - int i; - const char *expr, *sec, *arch; - expr = sec = arch = ""; - - for (i = 0; i < (int)req->fieldsz; i++) - if (0 == strcmp(req->fields[i].key, "expr")) - expr = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "query")) - expr = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "sec")) - sec = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "sektion")) - sec = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "arch")) - arch = req->fields[i].val; - - if (NULL != sec && 0 == strcmp(sec, "0")) - sec = NULL; - puts(""); printf("
\n"); printf("
\n" "Search Parameters\n" - " or \n" - " for manuals satisfying \n" + " or \n" + " for manuals satisfying \n" "q.expr ? req->q.expr : ""); printf("\">, section " - "q.sec ? req->q.sec : ""); printf("\">, arch " - "q.arch ? req->q.arch : ""); puts("\">.\n" "\n" "
\n" - "
\n" - ""); + ""); + puts(""); } static void @@ -379,9 +395,9 @@ resp_error400(void) resp_begin_html(400, "Query Malformed"); printf("

Malformed Query

\n" "

\n" - " The query your entered was malformed.\n" - " Try again from the\n" - " main page\n" + "The query your entered was malformed.\n" + "Try again from the\n" + "main page.\n" "

", progname); resp_end_html(); } @@ -393,13 +409,13 @@ resp_error404(const char *page) resp_begin_html(404, "Not Found"); puts("

Page Not Found

\n" "

\n" - " The page you're looking for, "); - printf(" "); + "The page you're looking for, "); + printf(""); html_print(page); printf(",\n" - " could not be found.\n" - " Try searching from the\n" - " main page\n" + "could not be found.\n" + "Try searching from the\n" + "main page.\n" "

", progname); resp_end_html(); } @@ -424,13 +440,9 @@ resp_baddb(void) static void resp_search(struct res *r, size_t sz, void *arg) { - int i, whatis; - const char *ep, *sec, *arch; + int i; const struct req *req; - whatis = 1; - ep = sec = arch = NULL; - if (1 == sz) { /* * If we have just one result, then jump there now @@ -444,43 +456,26 @@ resp_search(struct res *r, size_t sz, void *arg) return; } - req = (const struct req *)arg; - - for (i = 0; i < (int)req->fieldsz; i++) - if (0 == strcmp(req->fields[i].key, "expr")) - ep = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "query")) - ep = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "sec")) - sec = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "sektion")) - sec = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "arch")) - arch = req->fields[i].val; - else if (0 == strcmp(req->fields[i].key, "apropos")) - whatis = 0 == strcmp - (req->fields[i].val, "0"); - else if (0 == strcmp(req->fields[i].key, "op")) - whatis = 0 == strcasecmp - (req->fields[i].val, "whatis"); - qsort(r, sz, sizeof(struct res), cmp); resp_begin_html(200, NULL); + + req = (const struct req *)arg; resp_searchform(req); if (0 == sz) { - puts("

\n" - "No results found."); - if (whatis) { + printf("

\n" + "No %s results found.\n", + req->q.whatis ? "whatis" : "apropos"); + if (req->q.whatis) { printf("(Try q.expr ? req->q.expr : ""); printf("&sec="); - html_print(sec ? sec : ""); + html_print(req->q.sec ? req->q.sec : ""); printf("&arch="); - html_print(arch ? arch : ""); + html_print(req->q.arch ? req->q.arch : ""); puts("\">apropos?)"); } puts("

"); @@ -492,9 +487,11 @@ resp_search(struct res *r, size_t sz, void *arg) ""); for (i = 0; i < (int)sz; i++) { - printf("\n" + ""); + puts("\n" + ""); } puts("
\n" + "\n" + "", r[i].volume, r[i].rec); + printf("/show/0/%u/%u.html\">", r[i].volume, r[i].rec); html_print(r[i].title); putchar('('); html_print(r[i].cat); @@ -502,19 +499,21 @@ resp_search(struct res *r, size_t sz, void *arg) putchar('/'); html_print(r[i].arch); } - printf(")"); + printf(")\n" + ""); html_print(r[i].desc); - puts("
"); - resp_end_html(); } /* ARGSUSED */ static void -pg_index(const struct manpaths *ps, const struct req *req, char *path) +pg_index(const struct req *req, char *path) { resp_index(req); @@ -535,21 +534,21 @@ catman(const char *file) } resp_begin_http(200, NULL); - puts("" "\n" - "" "\n" - " " "\n" - " " "\n" - " " "\n" - " System Manpage Reference" "\n" - " " "\n" - " " "\n" - ""); + puts("\n" + "\n" + "\n" + "\n" + "\n" + "System Manpage Reference\n" + "\n" + "\n" + "\n" + "
");
 
-	puts("
");
 	while (NULL != (p = fgetln(f, &len))) {
 		bold = italic = 0;
 		for (i = 0; i < (int)len - 1; i++) {
@@ -713,42 +712,74 @@ format(const char *file)
 }
 
 static void
-pg_show(const struct manpaths *ps, const struct req *req, char *path)
+pg_show(const struct req *req, char *path)
 {
+	struct manpaths	 ps;
 	char		*sub;
 	char		 file[MAXPATHLEN];
 	const char	*fn, *cp;
 	int		 rc;
-	unsigned int	 vol, rec;
+	unsigned int	 vol, rec, mr;
 	DB		*idx;
 	DBT		 key, val;
 
-	if (NULL == path) {
+	idx = NULL;
+
+	/* Parse out mroot, volume, and record from the path. */
+
+	if (NULL == path || NULL == (sub = strchr(path, '/'))) {
 		resp_error400();
 		return;
-	} else if (NULL == (sub = strrchr(path, '/'))) {
+	} 
+	*sub++ = '\0';
+	if ( ! atou(path, &mr)) {
 		resp_error400();
 		return;
-	} else
-		*sub++ = '\0';
-
-	if ( ! (atou(path, &vol) && atou(sub, &rec))) {
+	}
+	path = sub;
+	if (NULL == (sub = strchr(path, '/'))) {
 		resp_error400();
 		return;
-	} else if (vol >= (unsigned int)ps->sz) {
+	}
+	*sub++ = '\0';
+	if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
 		resp_error400();
 		return;
+	} else if (mr >= (unsigned int)req->psz) {
+		resp_error400();
+		return;
 	}
 
-	strlcpy(file, ps->paths[vol], MAXPATHLEN);
+	/*
+	 * Begin by chdir()ing into the root of the manpath.
+	 * This way we can pick up the database files, which are
+	 * relative to the manpath root.
+	 */
+
+	if (-1 == chdir(req->p[(int)mr].path)) {
+		perror(req->p[(int)mr].path);
+		resp_baddb();
+		return;
+	}
+
+	memset(&ps, 0, sizeof(struct manpaths));
+	manpath_manconf("etc/catman.conf", &ps);
+
+	if (vol >= (unsigned int)ps.sz) {
+		resp_error400();
+		goto out;
+	}
+
+	strlcpy(file, ps.paths[vol], MAXPATHLEN);
 	strlcat(file, "/mandoc.index", MAXPATHLEN);
 
 	/* Open the index recno(3) database. */
 
 	idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
 	if (NULL == idx) {
+		perror(file);
 		resp_baddb();
-		return;
+		goto out;
 	}
 
 	key.data = &rec;
@@ -768,60 +799,59 @@ pg_show(const struct manpaths *ps, const struct req *r
 	else if (NULL == memchr(fn, '\0', val.size - (fn - cp)))
 		resp_baddb();
 	else {
-		strlcpy(file, cache, MAXPATHLEN);
-		strlcat(file, "/", MAXPATHLEN);
-		strlcat(file, fn, MAXPATHLEN);
 		if (0 == strcmp(cp, "cat"))
-			catman(file);
+			catman(fn + 1);
 		else
-			format(file);
+			format(fn + 1);
 	}
 out:
-	(*idx->close)(idx);
+	if (idx)
+		(*idx->close)(idx);
+	manpath_free(&ps);
 }
 
 static void
-pg_search(const struct manpaths *ps, const struct req *req, char *path)
+pg_search(const struct req *req, char *path)
 {
 	size_t		  tt;
-	int		  i, sz, rc, whatis;
+	struct manpaths	  ps;
+	int		  i, sz, rc;
 	const char	 *ep, *start;
 	char		**cp;
 	struct opts	  opt;
 	struct expr	 *expr;
 
-	expr = NULL;
-	cp = NULL;
-	ep = NULL;
-	sz = 0;
-	whatis = 1;
+	if (0 == req->psz) {
+		resp_search(NULL, 0, (void *)req);
+		return;
+	}
 
 	memset(&opt, 0, sizeof(struct opts));
 
-	for (sz = i = 0; i < (int)req->fieldsz; i++)
-		if (0 == strcmp(req->fields[i].key, "expr"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "query"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sec"))
-			opt.cat = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sektion"))
-			opt.cat = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "arch"))
-			opt.arch = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "apropos"))
-			whatis = 0 == strcmp
-				(req->fields[i].val, "0");
-		else if (0 == strcmp(req->fields[i].key, "op"))
-			whatis = 0 == strcasecmp
-				(req->fields[i].val, "whatis");
+	ep 	 = req->q.expr;
+	opt.arch = req->q.arch;
+	opt.cat  = req->q.sec;
+	rc 	 = -1;
+	sz 	 = 0;
+	cp	 = NULL;
 
-	if (NULL != opt.cat && 0 == strcmp(opt.cat, "0"))
-		opt.cat = NULL;
+	/*
+	 * Begin by chdir()ing into the root of the manpath.
+	 * This way we can pick up the database files, which are
+	 * relative to the manpath root.
+	 */
 
+	if (-1 == (chdir(req->p[0].path))) {
+		perror(req->p[0].path);
+		resp_search(NULL, 0, (void *)req);
+		return;
+	}
+
+	memset(&ps, 0, sizeof(struct manpaths));
+	manpath_manconf("etc/catman.conf", &ps);
+
 	/*
-	 * Poor man's tokenisation.
-	 * Just break apart by spaces.
+	 * Poor man's tokenisation: just break apart by spaces.
 	 * Yes, this is half-ass.  But it works for now.
 	 */
 
@@ -840,19 +870,17 @@ pg_search(const struct manpaths *ps, const struct req 
 			ep++;
 	}
 
-	rc = -1;
-
 	/*
 	 * Pump down into apropos backend.
 	 * The resp_search() function is called with the results.
 	 */
 
-	expr = whatis ? termcomp(sz, cp, &tt) :
-		        exprcomp(sz, cp, &tt);
+	expr = req->q.whatis ? 
+		termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
 
 	if (NULL != expr)
 		rc = apropos_search
-			(ps->sz, ps->paths, &opt,
+			(ps.sz, ps.paths, &opt,
 			 expr, tt, (void *)req, resp_search);
 
 	/* ...unless errors occured. */
@@ -867,17 +895,19 @@ pg_search(const struct manpaths *ps, const struct req 
 
 	free(cp);
 	exprfree(expr);
+	manpath_free(&ps);
 }
 
 int
 main(void)
 {
 	int		 i;
+	char		 buf[MAXPATHLEN];
+	DIR		*cwd;
 	struct req	 req;
 	char		*p, *path, *subpath;
-	struct manpaths	 paths;
 
-	/* HTTP init: read and parse the query string. */
+	/* Scan our run-time environment. */
 
 	progname = getenv("SCRIPT_NAME");
 	if (NULL == progname)
@@ -887,21 +917,43 @@ main(void)
 	if (NULL == cache)
 		cache = "/cache/man.cgi";
 
-	if (-1 == chdir(cache)) {
-		resp_bad();
-		return(EXIT_FAILURE);
-	}
-
 	host = getenv("HTTP_HOST");
 	if (NULL == host)
 		host = "localhost";
 
+	/*
+	 * First we change directory into the cache directory so that
+	 * subsequent scanning for manpath directories is rooted
+	 * relative to the same position.
+	 */
+
+	if (-1 == chdir(cache)) {
+		perror(cache);
+		resp_bad();
+		return(EXIT_FAILURE);
+	} else if (NULL == (cwd = opendir(cache))) {
+		perror(cache);
+		resp_bad();
+		return(EXIT_FAILURE);
+	} 
+
 	memset(&req, 0, sizeof(struct req));
 
+	strlcpy(buf, ".", MAXPATHLEN);
+	pathgen(cwd, buf, &req);
+	closedir(cwd);
+
+	/* Next parse out the query string. */
+
 	if (NULL != (p = getenv("QUERY_STRING")))
-		kval_parse(&req.fields, &req.fieldsz, p);
+		http_parse(&req.q, p);
 
-	/* Resolve leading subpath component. */
+	/*
+	 * Now juggle paths to extract information.
+	 * We want to extract our filetype (the file suffix), the
+	 * initial path component, then the trailing component(s).
+	 * Start with leading subpath component. 
+	 */
 
 	subpath = path = NULL;
 	req.page = PAGE__MAX;
@@ -932,31 +984,29 @@ main(void)
 				break;
 			}
 
-	/* Initialise MANPATH. */
-
-	memset(&paths, 0, sizeof(struct manpaths));
-	manpath_manconf("etc/catman.conf", &paths);
-
 	/* Route pages. */
 
 	switch (req.page) {
 	case (PAGE_INDEX):
-		pg_index(&paths, &req, subpath);
+		pg_index(&req, subpath);
 		break;
 	case (PAGE_SEARCH):
-		pg_search(&paths, &req, subpath);
+		pg_search(&req, subpath);
 		break;
 	case (PAGE_SHOW):
-		pg_show(&paths, &req, subpath);
+		pg_show(&req, subpath);
 		break;
 	default:
 		resp_error404(path);
 		break;
 	}
 
-	manpath_free(&paths);
-	kval_free(req.fields, req.fieldsz);
+	for (i = 0; i < (int)req.psz; i++) {
+		free(req.p[i].path);
+		free(req.p[i].name);
+	}
 
+	free(req.p);
 	return(EXIT_SUCCESS);
 }
 
@@ -968,3 +1018,112 @@ cmp(const void *p1, const void *p2)
 				((const struct res *)p2)->title));
 }
 
+/*
+ * Check to see if an "etc" path consists of a catman.conf file.  If it
+ * does, that means that the path contains a tree created by catman(8)
+ * and should be used for indexing.
+ */
+static int
+pathstop(DIR *dir)
+{
+	struct dirent	*d;
+
+	while (NULL != (d = readdir(dir)))
+		if (DT_REG == d->d_type)
+			if (0 == strcmp(d->d_name, "catman.conf"))
+				return(1);
+
+	return(0);
+}
+
+/*
+ * Scan for indexable paths.
+ * This adds all paths with "etc/catman.conf" to the buffer.
+ */
+static void
+pathgen(DIR *dir, char *path, struct req *req)
+{
+	struct dirent	*d;
+	char		*cp;
+	DIR		*cd;
+	int		 rc;
+	size_t		 sz, ssz;
+
+	sz = strlcat(path, "/", MAXPATHLEN);
+	if (sz >= MAXPATHLEN) {
+		fprintf(stderr, "%s: Path too long", path);
+		return;
+	} 
+
+	/* 
+	 * First, scan for the "etc" directory.
+	 * If it's found, then see if it should cause us to stop.  This
+	 * happens when a catman.conf is found in the directory.
+	 */
+
+	rc = 0;
+	while (0 == rc && NULL != (d = readdir(dir))) {
+		if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
+			continue;
+
+		path[(int)sz] = '\0';
+		ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+		if (ssz >= MAXPATHLEN) {
+			fprintf(stderr, "%s: Path too long", path);
+			return;
+		} else if (NULL == (cd = opendir(path))) {
+			perror(path);
+			return;
+		} 
+		
+		rc = pathstop(cd);
+		closedir(cd);
+	}
+
+	if (rc > 0) {
+		/* This also strips the trailing slash. */
+		path[(int)sz - 1] = '\0';
+		req->p = mandoc_realloc
+			(req->p, 
+			 (req->psz + 1) * sizeof(struct paths));
+		req->p[(int)req->psz].path = mandoc_strdup(path);
+		/* And this strips out the leading "./". */
+		req->p[(int)req->psz].name = 
+			cp = mandoc_strdup(path + 2);
+		req->psz++;
+		/* 
+		 * The name is just the path with all the slashes taken
+		 * out of it.  Simple but effective. 
+		 */
+		for ( ; '\0' != *cp; cp++) 
+			if ('/' == *cp)
+				*cp = ' ';
+		return;
+	} 
+
+	/*
+	 * If no etc/catman.conf was found, recursively enter child
+	 * directory and continue scanning.
+	 */
+
+	rewinddir(dir);
+	while (NULL != (d = readdir(dir))) {
+		if (DT_DIR != d->d_type || '.' == d->d_name[0])
+			continue;
+
+		path[(int)sz] = '\0';
+		ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+		if (ssz >= MAXPATHLEN) {
+			fprintf(stderr, "%s: Path too long", path);
+			return;
+		} else if (NULL == (cd = opendir(path))) {
+			perror(path);
+			return;
+		}
+
+		pathgen(cd, path, req);
+		closedir(cd);
+	}
+}