Annotation of mandoc/cgi.c, Revision 1.7
1.7 ! kristaps 1: /* $Id: cgi.c,v 1.6 2011/11/23 10:01:04 kristaps Exp $ */
1.6 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/param.h>
22: #include <sys/wait.h>
23:
1.1 kristaps 24: #include <assert.h>
1.6 kristaps 25: #include <ctype.h>
26: #include <errno.h>
1.1 kristaps 27: #include <fcntl.h>
1.6 kristaps 28: #include <limits.h>
1.1 kristaps 29: #include <regex.h>
30: #include <stdio.h>
31: #include <stdarg.h>
1.5 kristaps 32: #include <stdint.h>
1.1 kristaps 33: #include <stdlib.h>
34: #include <string.h>
1.6 kristaps 35: #include <unistd.h>
1.1 kristaps 36:
1.6 kristaps 37: #include "apropos_db.h"
1.4 schwarze 38: #include "mandoc.h"
1.6 kristaps 39: #include "manpath.h"
40:
41: #ifdef __linux__
42: # include <db_185.h>
43: #else
44: # include <db.h>
45: #endif
1.1 kristaps 46:
47: enum page {
48: PAGE_INDEX,
49: PAGE_SEARCH,
1.6 kristaps 50: PAGE_SHOW,
1.1 kristaps 51: PAGE__MAX
52: };
53:
54: struct kval {
55: char *key;
56: char *val;
57: };
58:
59: struct req {
1.6 kristaps 60: struct kval *fields;
1.1 kristaps 61: size_t fieldsz;
62: enum page page;
63: };
64:
1.6 kristaps 65: static int atou(const char *, unsigned *);
1.7 ! kristaps 66: static void format_insecure(const char *);
! 67: static void format_secure(const char *);
1.6 kristaps 68: static void html_print(const char *);
1.1 kristaps 69: static int kval_decode(char *);
70: static void kval_parse(struct kval **, size_t *, char *);
71: static void kval_free(struct kval *, size_t);
1.6 kristaps 72: static void pg_index(const struct manpaths *,
73: const struct req *, char *);
74: static void pg_search(const struct manpaths *,
75: const struct req *, char *);
76: static void pg_show(const struct manpaths *,
77: const struct req *, char *);
1.7 ! kristaps 78: static void resp_bad(void);
1.6 kristaps 79: static void resp_baddb(void);
80: static void resp_badexpr(const struct req *);
81: static void resp_badmanual(void);
82: static void resp_begin_html(int, const char *);
83: static void resp_begin_http(int, const char *);
84: static void resp_end_html(void);
85: static void resp_index(const struct req *);
86: static void resp_search(struct res *, size_t, void *);
87: static void resp_searchform(const struct req *);
88:
1.7 ! kristaps 89: static int insecure = 1;
1.6 kristaps 90: static const char *progname;
1.7 ! kristaps 91: static const char *cache;
1.6 kristaps 92: static const char *host;
1.1 kristaps 93:
94: static const char * const pages[PAGE__MAX] = {
95: "index", /* PAGE_INDEX */
96: "search", /* PAGE_SEARCH */
1.6 kristaps 97: "show", /* PAGE_SHOW */
1.1 kristaps 98: };
99:
1.6 kristaps 100: /*
101: * This is just OpenBSD's strtol(3) suggestion.
102: * I use it instead of strtonum(3) for portability's sake.
103: */
104: static int
105: atou(const char *buf, unsigned *v)
106: {
107: char *ep;
108: long lval;
109:
110: errno = 0;
111: lval = strtol(buf, &ep, 10);
112: if (buf[0] == '\0' || *ep != '\0')
113: return(0);
114: if ((errno == ERANGE && (lval == LONG_MAX ||
115: lval == LONG_MIN)) ||
116: (lval > UINT_MAX || lval < 0))
117: return(0);
118:
119: *v = (unsigned int)lval;
120: return(1);
121: }
1.1 kristaps 122:
1.6 kristaps 123: /*
124: * Print a word, escaping HTML along the way.
125: * This will pass non-ASCII straight to output: be warned!
126: */
1.1 kristaps 127: static void
1.6 kristaps 128: html_print(const char *p)
1.1 kristaps 129: {
130: char c;
1.6 kristaps 131:
132: if (NULL == p)
133: return;
1.1 kristaps 134:
135: while ('\0' != *p)
136: switch ((c = *p++)) {
137: case ('"'):
138: printf(""e;");
139: break;
140: case ('&'):
141: printf("&");
142: break;
143: case ('>'):
144: printf(">");
145: break;
146: case ('<'):
147: printf("<");
148: break;
149: default:
150: putchar((unsigned char)c);
151: break;
152: }
153: }
154:
155: static void
156: kval_free(struct kval *p, size_t sz)
157: {
158: int i;
159:
160: for (i = 0; i < (int)sz; i++) {
161: free(p[i].key);
162: free(p[i].val);
163: }
164: free(p);
165: }
166:
167: /*
168: * Parse out key-value pairs from an HTTP request variable.
1.6 kristaps 169: * This can be either a cookie or a POST/GET string, although man.cgi
170: * uses only GET for simplicity.
1.1 kristaps 171: */
172: static void
173: kval_parse(struct kval **kv, size_t *kvsz, char *p)
174: {
175: char *key, *val;
176: size_t sz, cur;
177:
178: cur = 0;
179:
180: while (p && '\0' != *p) {
181: while (' ' == *p)
182: p++;
183:
184: key = p;
185: val = NULL;
186:
187: if (NULL != (p = strchr(p, '='))) {
188: *p++ = '\0';
189: val = p;
190:
191: sz = strcspn(p, ";&");
192: /* LINTED */
193: p += sz;
194:
195: if ('\0' != *p)
196: *p++ = '\0';
197: } else {
198: p = key;
199: sz = strcspn(p, ";&");
200: /* LINTED */
201: p += sz;
202:
203: if ('\0' != *p)
204: p++;
205: continue;
206: }
207:
208: if ('\0' == *key || '\0' == *val)
209: continue;
210:
211: /* Just abort handling. */
212:
213: if ( ! kval_decode(key))
214: return;
215: if ( ! kval_decode(val))
216: return;
217:
218: if (*kvsz + 1 >= cur) {
219: cur++;
220: *kv = mandoc_realloc
221: (*kv, cur * sizeof(struct kval));
222: }
223:
224: (*kv)[(int)*kvsz].key = mandoc_strdup(key);
225: (*kv)[(int)*kvsz].val = mandoc_strdup(val);
226: (*kvsz)++;
227: }
228: }
229:
230: /*
1.6 kristaps 231: * HTTP-decode a string. The standard explanation is that this turns
232: * "%4e+foo" into "n foo" in the regular way. This is done in-place
233: * over the allocated string.
1.1 kristaps 234: */
235: static int
236: kval_decode(char *p)
237: {
238: char hex[3];
239: int c;
240:
241: hex[2] = '\0';
242:
243: for ( ; '\0' != *p; p++) {
244: if ('%' == *p) {
245: if ('\0' == (hex[0] = *(p + 1)))
246: return(0);
247: if ('\0' == (hex[1] = *(p + 2)))
248: return(0);
249: if (1 != sscanf(hex, "%x", &c))
250: return(0);
251: if ('\0' == c)
252: return(0);
253:
254: *p = (char)c;
255: memmove(p + 1, p + 3, strlen(p + 3) + 1);
256: } else
257: *p = '+' == *p ? ' ' : *p;
258: }
259:
260: *p = '\0';
261: return(1);
262: }
263:
1.6 kristaps 264: static void
265: resp_begin_http(int code, const char *msg)
266: {
267:
268: if (200 != code)
269: printf("Status: %d %s\n", code, msg);
270:
271: puts("Content-Type: text/html; charset=utf-8" "\n"
272: "Cache-Control: no-cache" "\n"
273: "Pragma: no-cache" "\n"
274: "");
275:
276: fflush(stdout);
277: }
278:
279: static void
280: resp_begin_html(int code, const char *msg)
281: {
282:
283: resp_begin_http(code, msg);
284:
285: puts("<!DOCTYPE HTML PUBLIC " "\n"
286: " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
287: " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
288: "<HTML>" "\n"
289: " <HEAD>" "\n"
290: " <TITLE>System Manpage Reference</TITLE>" "\n"
291: " </HEAD>" "\n"
292: " <BODY>" "\n"
293: "<!-- Begin page content. //-->");
294: }
295:
296: static void
297: resp_end_html(void)
298: {
299:
300: puts(" </BODY>\n</HTML>");
301: }
302:
303: static void
304: resp_searchform(const struct req *req)
305: {
306: int i;
307: const char *expr, *sec, *arch;
308:
309: expr = sec = arch = "";
310:
311: for (i = 0; i < (int)req->fieldsz; i++)
312: if (0 == strcmp(req->fields[i].key, "expr"))
313: expr = req->fields[i].val;
314: else if (0 == strcmp(req->fields[i].key, "sec"))
315: sec = req->fields[i].val;
316: else if (0 == strcmp(req->fields[i].key, "arch"))
317: arch = req->fields[i].val;
318:
319: puts("<!-- Begin search form. //-->");
320: printf("<FORM ACTION=\"");
321: html_print(progname);
322: printf("/search\" METHOD=\"get\">\n");
323: puts(" <FIELDSET>" "\n"
324: " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
325: printf(" Terms: <INPUT TYPE=\"text\" "
326: "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
327: html_print(expr);
328: puts("\">");
329: printf(" Section: <INPUT TYPE=\"text\" "
330: "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
331: html_print(sec);
332: puts("\">");
333: printf(" Arch: <INPUT TYPE=\"text\" "
334: "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
335: html_print(arch);
336: puts("\">");
337: puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
338: }
339:
340: static void
341: resp_index(const struct req *req)
342: {
343:
344: resp_begin_html(200, NULL);
345: resp_searchform(req);
346: resp_end_html();
347: }
348:
349: static void
350: resp_badmanual(void)
351: {
352:
353: resp_begin_html(404, "Not Found");
354: puts("<P>Requested manual not found.</P>");
355: resp_end_html();
356: }
357:
358: static void
359: resp_badexpr(const struct req *req)
360: {
361:
362: resp_begin_html(200, NULL);
363: resp_searchform(req);
364: puts("<P>Your search didn't work.</P>");
365: resp_end_html();
366: }
1.1 kristaps 367:
368: static void
1.7 ! kristaps 369: resp_bad(void)
! 370: {
! 371: resp_begin_html(500, "Internal Server Error");
! 372: puts("<P>Generic badness happened.</P>");
! 373: resp_end_html();
! 374: }
! 375:
! 376: static void
1.6 kristaps 377: resp_baddb(void)
1.1 kristaps 378: {
379:
1.6 kristaps 380: resp_begin_html(500, "Internal Server Error");
381: puts("<P>Your database is broken.</P>");
382: resp_end_html();
1.1 kristaps 383: }
384:
385: static void
1.6 kristaps 386: resp_search(struct res *r, size_t sz, void *arg)
1.1 kristaps 387: {
388: int i;
389:
1.6 kristaps 390: if (1 == sz) {
391: /*
392: * If we have just one result, then jump there now
393: * without any delay.
394: */
395: puts("Status: 303 See Other");
396: printf("Location: http://%s%s/show/%u/%u.html\n",
397: host, progname,
398: r[0].volume, r[0].rec);
399: puts("Content-Type: text/html; charset=utf-8\n");
400: return;
401: }
402:
403: resp_begin_html(200, NULL);
404: resp_searchform((const struct req *)arg);
405:
406: if (0 == sz)
407: puts("<P>No results found.</P>");
1.1 kristaps 408:
409: for (i = 0; i < (int)sz; i++) {
1.6 kristaps 410: printf("<P><A HREF=\"");
411: html_print(progname);
412: printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
413: html_print(r[i].title);
1.1 kristaps 414: putchar('(');
1.6 kristaps 415: html_print(r[i].cat);
416: if (r[i].arch && '\0' != *r[i].arch) {
417: putchar('/');
418: html_print(r[i].arch);
419: }
420: printf(")</A> ");
421: html_print(r[i].desc);
422: puts("</P>");
1.1 kristaps 423: }
1.6 kristaps 424:
425: resp_end_html();
426: }
427:
428: /* ARGSUSED */
429: static void
430: pg_index(const struct manpaths *ps, const struct req *req, char *path)
431: {
432:
433: resp_index(req);
1.1 kristaps 434: }
435:
436: static void
1.7 ! kristaps 437: format_insecure(const char *file)
! 438: {
! 439: pid_t pid;
! 440: char cmd[MAXPATHLEN];
! 441:
! 442: strlcpy(cmd, "man=", MAXPATHLEN);
! 443: strlcat(cmd, progname, MAXPATHLEN);
! 444: strlcat(cmd, "/search?expr=%N&sec=%S", MAXPATHLEN);
! 445:
! 446: /* Get ready to call the child mandoc(1) process. */
! 447:
! 448: if (-1 == (pid = fork()))
! 449: exit(EXIT_FAILURE);
! 450:
! 451: if (pid > 0) {
! 452: waitpid(pid, NULL, 0);
! 453: return;
! 454: }
! 455:
! 456: dup2(STDOUT_FILENO, STDERR_FILENO);
! 457:
! 458: puts("Content-Type: text/html; charset=utf-8\n");
! 459:
! 460: fflush(stdout);
! 461:
! 462: execlp("mandoc", "mandoc", "-T",
! 463: "html", "-O", cmd, file, (char *)NULL);
! 464: }
! 465:
! 466: static void
! 467: format_secure(const char *file)
! 468: {
! 469: char buf[BUFSIZ];
! 470: int fd;
! 471: ssize_t ssz;
! 472:
! 473: if (-1 == (fd = open(file, O_RDONLY, 0))) {
! 474: resp_baddb();
! 475: return;
! 476: }
! 477:
! 478: resp_begin_http(200, NULL);
! 479:
! 480: do {
! 481: ssz = read(fd, buf, BUFSIZ);
! 482: if (ssz > 0)
! 483: write(STDOUT_FILENO, buf, ssz);
! 484: } while (ssz > 0);
! 485:
! 486: close(fd);
! 487: }
! 488:
! 489: static void
1.6 kristaps 490: pg_show(const struct manpaths *ps, const struct req *req, char *path)
1.1 kristaps 491: {
1.6 kristaps 492: char *sub;
1.7 ! kristaps 493: char file[MAXPATHLEN];
1.6 kristaps 494: int rc;
495: unsigned int vol, rec;
496: DB *db;
497: DBT key, val;
498:
499: if (NULL == path) {
500: resp_badmanual();
501: return;
502: } else if (NULL == (sub = strrchr(path, '/'))) {
503: resp_badmanual();
504: return;
505: } else
506: *sub++ = '\0';
507:
508: if ( ! (atou(path, &vol) && atou(sub, &rec))) {
509: resp_badmanual();
510: return;
511: } else if (vol >= (unsigned int)ps->sz) {
512: resp_badmanual();
513: return;
514: }
515:
516: strlcpy(file, ps->paths[vol], MAXPATHLEN);
517: strlcat(file, "/mandoc.index", MAXPATHLEN);
518:
519: /* Open the index recno(3) database. */
520:
521: db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
522: if (NULL == db) {
523: resp_baddb();
524: return;
525: }
526:
527: key.data = &rec;
528: key.size = 4;
529:
530: if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
531: rc < 0 ? resp_baddb() : resp_badmanual();
532: (*db->close)(db);
533: return;
534: }
535:
536: /* Extra filename: the first nil-terminated entry. */
537:
1.7 ! kristaps 538: (*db->close)(db);
! 539:
1.6 kristaps 540: strlcpy(file, ps->paths[vol], MAXPATHLEN);
541: strlcat(file, "/", MAXPATHLEN);
542: strlcat(file, (char *)val.data, MAXPATHLEN);
543:
1.7 ! kristaps 544: if ( ! insecure) {
! 545: strlcat(file, ".html", MAXPATHLEN);
! 546: format_secure(file);
! 547: } else
! 548: format_insecure(file);
1.6 kristaps 549: }
550:
551: static void
552: pg_search(const struct manpaths *ps, const struct req *req, char *path)
553: {
554: size_t tt;
555: int i, sz, rc;
556: const char *ep, *start;
557: char **cp;
558: struct opts opt;
559: struct expr *expr;
560:
561: expr = NULL;
562: cp = NULL;
563: ep = NULL;
564: sz = 0;
1.1 kristaps 565:
566: memset(&opt, 0, sizeof(struct opts));
1.6 kristaps 567:
568: for (sz = i = 0; i < (int)req->fieldsz; i++)
569: if (0 == strcmp(req->fields[i].key, "expr"))
570: ep = req->fields[i].val;
571: else if (0 == strcmp(req->fields[i].key, "sec"))
572: opt.cat = req->fields[i].val;
573: else if (0 == strcmp(req->fields[i].key, "arch"))
574: opt.arch = req->fields[i].val;
575:
576: /*
577: * Poor man's tokenisation.
578: * Just break apart by spaces.
579: * Yes, this is half-ass. But it works for now.
580: */
581:
582: while (ep && isspace((unsigned char)*ep))
583: ep++;
584:
585: while (ep && '\0' != *ep) {
586: cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
587: start = ep;
588: while ('\0' != *ep && ! isspace((unsigned char)*ep))
589: ep++;
590: cp[sz] = mandoc_malloc((ep - start) + 1);
591: memcpy(cp[sz], start, ep - start);
592: cp[sz++][ep - start] = '\0';
593: while (isspace((unsigned char)*ep))
594: ep++;
595: }
596:
597: rc = -1;
598:
599: /*
600: * Pump down into apropos backend.
601: * The resp_search() function is called with the results.
602: */
603:
604: if (NULL != (expr = exprcomp(sz, cp, &tt)))
605: rc = apropos_search
606: (ps->sz, ps->paths, &opt,
607: expr, tt, (void *)req, resp_search);
608:
609: /* ...unless errors occured. */
610:
611: if (0 == rc)
612: resp_baddb();
613: else if (-1 == rc)
614: resp_badexpr(req);
615:
616: for (i = 0; i < sz; i++)
617: free(cp[i]);
618:
619: free(cp);
620: exprfree(expr);
1.1 kristaps 621: }
622:
623: int
624: main(void)
625: {
626: int i;
627: struct req req;
1.6 kristaps 628: char *p, *path, *subpath;
629: struct manpaths paths;
630:
631: /* HTTP init: read and parse the query string. */
632:
633: progname = getenv("SCRIPT_NAME");
634: if (NULL == progname)
635: progname = "";
636:
1.7 ! kristaps 637: cache = getenv("CACHE_DIR");
! 638: if (NULL == cache)
! 639: cache = "/cache/man.cgi";
! 640:
! 641: if (NULL == getenv("INSECURE")) {
! 642: insecure = 0;
! 643: if (-1 == chdir(cache)) {
! 644: resp_bad();
! 645: return(EXIT_FAILURE);
! 646: }
! 647: }
! 648:
1.6 kristaps 649: host = getenv("HTTP_HOST");
650: if (NULL == host)
651: host = "localhost";
1.1 kristaps 652:
653: memset(&req, 0, sizeof(struct req));
654:
655: if (NULL != (p = getenv("QUERY_STRING")))
656: kval_parse(&req.fields, &req.fieldsz, p);
657:
1.6 kristaps 658: /* Resolve leading subpath component. */
1.1 kristaps 659:
1.6 kristaps 660: subpath = path = NULL;
1.1 kristaps 661: req.page = PAGE__MAX;
662:
663: if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
664: req.page = PAGE_INDEX;
1.6 kristaps 665:
1.1 kristaps 666: if (NULL != path && '/' == *path && '\0' == *++path)
667: req.page = PAGE_INDEX;
668:
1.6 kristaps 669: /* Strip file suffix. */
670:
671: if (NULL != path && NULL != (p = strrchr(path, '.')))
672: if (NULL != p && NULL == strchr(p, '/'))
673: *p++ = '\0';
674:
675: /* Resolve subpath component. */
1.1 kristaps 676:
677: if (NULL != path && NULL != (subpath = strchr(path, '/')))
1.6 kristaps 678: *subpath++ = '\0';
1.1 kristaps 679:
1.6 kristaps 680: /* Map path into one we recognise. */
1.1 kristaps 681:
682: if (NULL != path && '\0' != *path)
683: for (i = 0; i < (int)PAGE__MAX; i++)
684: if (0 == strcmp(pages[i], path)) {
685: req.page = (enum page)i;
686: break;
687: }
688:
1.6 kristaps 689: /* Initialise MANPATH. */
690:
691: memset(&paths, 0, sizeof(struct manpaths));
1.7 ! kristaps 692: if ( ! insecure)
! 693: manpath_manconf("etc/man.conf", &paths);
! 694: else
! 695: manpath_parse(&paths, NULL, NULL);
1.6 kristaps 696:
697: /* Route pages. */
698:
1.1 kristaps 699: switch (req.page) {
700: case (PAGE_INDEX):
1.6 kristaps 701: pg_index(&paths, &req, subpath);
1.1 kristaps 702: break;
703: case (PAGE_SEARCH):
1.6 kristaps 704: pg_search(&paths, &req, subpath);
705: break;
706: case (PAGE_SHOW):
707: pg_show(&paths, &req, subpath);
1.1 kristaps 708: break;
709: default:
710: break;
711: }
712:
1.6 kristaps 713: manpath_free(&paths);
1.1 kristaps 714: kval_free(req.fields, req.fieldsz);
1.6 kristaps 715:
1.1 kristaps 716: return(EXIT_SUCCESS);
717: }
CVSweb