Annotation of mandoc/cgi.c, Revision 1.8
1.8 ! kristaps 1: /* $Id: cgi.c,v 1.7 2011/11/24 12:27:18 kristaps Exp $ */
1.6 kristaps 2: /*
3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include <sys/param.h>
22: #include <sys/wait.h>
23:
1.1 kristaps 24: #include <assert.h>
1.6 kristaps 25: #include <ctype.h>
26: #include <errno.h>
1.1 kristaps 27: #include <fcntl.h>
1.6 kristaps 28: #include <limits.h>
1.1 kristaps 29: #include <regex.h>
30: #include <stdio.h>
31: #include <stdarg.h>
1.5 kristaps 32: #include <stdint.h>
1.1 kristaps 33: #include <stdlib.h>
34: #include <string.h>
1.6 kristaps 35: #include <unistd.h>
1.1 kristaps 36:
1.6 kristaps 37: #include "apropos_db.h"
1.4 schwarze 38: #include "mandoc.h"
1.8 ! kristaps 39: #include "mdoc.h"
! 40: #include "man.h"
! 41: #include "main.h"
1.6 kristaps 42: #include "manpath.h"
43:
44: #ifdef __linux__
45: # include <db_185.h>
46: #else
47: # include <db.h>
48: #endif
1.1 kristaps 49:
50: enum page {
51: PAGE_INDEX,
52: PAGE_SEARCH,
1.6 kristaps 53: PAGE_SHOW,
1.1 kristaps 54: PAGE__MAX
55: };
56:
57: struct kval {
58: char *key;
59: char *val;
60: };
61:
62: struct req {
1.6 kristaps 63: struct kval *fields;
1.1 kristaps 64: size_t fieldsz;
65: enum page page;
66: };
67:
1.6 kristaps 68: static int atou(const char *, unsigned *);
1.8 ! kristaps 69: static void format(const char *);
1.6 kristaps 70: static void html_print(const char *);
1.1 kristaps 71: static int kval_decode(char *);
72: static void kval_parse(struct kval **, size_t *, char *);
73: static void kval_free(struct kval *, size_t);
1.6 kristaps 74: static void pg_index(const struct manpaths *,
75: const struct req *, char *);
76: static void pg_search(const struct manpaths *,
77: const struct req *, char *);
78: static void pg_show(const struct manpaths *,
79: const struct req *, char *);
1.7 kristaps 80: static void resp_bad(void);
1.6 kristaps 81: static void resp_baddb(void);
82: static void resp_badexpr(const struct req *);
83: static void resp_badmanual(void);
84: static void resp_begin_html(int, const char *);
85: static void resp_begin_http(int, const char *);
86: static void resp_end_html(void);
87: static void resp_index(const struct req *);
88: static void resp_search(struct res *, size_t, void *);
89: static void resp_searchform(const struct req *);
90:
91: static const char *progname;
1.7 kristaps 92: static const char *cache;
1.6 kristaps 93: static const char *host;
1.1 kristaps 94:
95: static const char * const pages[PAGE__MAX] = {
96: "index", /* PAGE_INDEX */
97: "search", /* PAGE_SEARCH */
1.6 kristaps 98: "show", /* PAGE_SHOW */
1.1 kristaps 99: };
100:
1.6 kristaps 101: /*
102: * This is just OpenBSD's strtol(3) suggestion.
103: * I use it instead of strtonum(3) for portability's sake.
104: */
105: static int
106: atou(const char *buf, unsigned *v)
107: {
108: char *ep;
109: long lval;
110:
111: errno = 0;
112: lval = strtol(buf, &ep, 10);
113: if (buf[0] == '\0' || *ep != '\0')
114: return(0);
115: if ((errno == ERANGE && (lval == LONG_MAX ||
116: lval == LONG_MIN)) ||
117: (lval > UINT_MAX || lval < 0))
118: return(0);
119:
120: *v = (unsigned int)lval;
121: return(1);
122: }
1.1 kristaps 123:
1.6 kristaps 124: /*
125: * Print a word, escaping HTML along the way.
126: * This will pass non-ASCII straight to output: be warned!
127: */
1.1 kristaps 128: static void
1.6 kristaps 129: html_print(const char *p)
1.1 kristaps 130: {
131: char c;
1.6 kristaps 132:
133: if (NULL == p)
134: return;
1.1 kristaps 135:
136: while ('\0' != *p)
137: switch ((c = *p++)) {
138: case ('"'):
139: printf(""e;");
140: break;
141: case ('&'):
142: printf("&");
143: break;
144: case ('>'):
145: printf(">");
146: break;
147: case ('<'):
148: printf("<");
149: break;
150: default:
151: putchar((unsigned char)c);
152: break;
153: }
154: }
155:
156: static void
157: kval_free(struct kval *p, size_t sz)
158: {
159: int i;
160:
161: for (i = 0; i < (int)sz; i++) {
162: free(p[i].key);
163: free(p[i].val);
164: }
165: free(p);
166: }
167:
168: /*
169: * Parse out key-value pairs from an HTTP request variable.
1.6 kristaps 170: * This can be either a cookie or a POST/GET string, although man.cgi
171: * uses only GET for simplicity.
1.1 kristaps 172: */
173: static void
174: kval_parse(struct kval **kv, size_t *kvsz, char *p)
175: {
176: char *key, *val;
177: size_t sz, cur;
178:
179: cur = 0;
180:
181: while (p && '\0' != *p) {
182: while (' ' == *p)
183: p++;
184:
185: key = p;
186: val = NULL;
187:
188: if (NULL != (p = strchr(p, '='))) {
189: *p++ = '\0';
190: val = p;
191:
192: sz = strcspn(p, ";&");
193: /* LINTED */
194: p += sz;
195:
196: if ('\0' != *p)
197: *p++ = '\0';
198: } else {
199: p = key;
200: sz = strcspn(p, ";&");
201: /* LINTED */
202: p += sz;
203:
204: if ('\0' != *p)
205: p++;
206: continue;
207: }
208:
209: if ('\0' == *key || '\0' == *val)
210: continue;
211:
212: /* Just abort handling. */
213:
214: if ( ! kval_decode(key))
215: return;
216: if ( ! kval_decode(val))
217: return;
218:
219: if (*kvsz + 1 >= cur) {
220: cur++;
221: *kv = mandoc_realloc
222: (*kv, cur * sizeof(struct kval));
223: }
224:
225: (*kv)[(int)*kvsz].key = mandoc_strdup(key);
226: (*kv)[(int)*kvsz].val = mandoc_strdup(val);
227: (*kvsz)++;
228: }
229: }
230:
231: /*
1.6 kristaps 232: * HTTP-decode a string. The standard explanation is that this turns
233: * "%4e+foo" into "n foo" in the regular way. This is done in-place
234: * over the allocated string.
1.1 kristaps 235: */
236: static int
237: kval_decode(char *p)
238: {
239: char hex[3];
240: int c;
241:
242: hex[2] = '\0';
243:
244: for ( ; '\0' != *p; p++) {
245: if ('%' == *p) {
246: if ('\0' == (hex[0] = *(p + 1)))
247: return(0);
248: if ('\0' == (hex[1] = *(p + 2)))
249: return(0);
250: if (1 != sscanf(hex, "%x", &c))
251: return(0);
252: if ('\0' == c)
253: return(0);
254:
255: *p = (char)c;
256: memmove(p + 1, p + 3, strlen(p + 3) + 1);
257: } else
258: *p = '+' == *p ? ' ' : *p;
259: }
260:
261: *p = '\0';
262: return(1);
263: }
264:
1.6 kristaps 265: static void
266: resp_begin_http(int code, const char *msg)
267: {
268:
269: if (200 != code)
270: printf("Status: %d %s\n", code, msg);
271:
272: puts("Content-Type: text/html; charset=utf-8" "\n"
273: "Cache-Control: no-cache" "\n"
274: "Pragma: no-cache" "\n"
275: "");
276:
277: fflush(stdout);
278: }
279:
280: static void
281: resp_begin_html(int code, const char *msg)
282: {
283:
284: resp_begin_http(code, msg);
285:
286: puts("<!DOCTYPE HTML PUBLIC " "\n"
287: " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
288: " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
289: "<HTML>" "\n"
290: " <HEAD>" "\n"
291: " <TITLE>System Manpage Reference</TITLE>" "\n"
292: " </HEAD>" "\n"
293: " <BODY>" "\n"
294: "<!-- Begin page content. //-->");
295: }
296:
297: static void
298: resp_end_html(void)
299: {
300:
301: puts(" </BODY>\n</HTML>");
302: }
303:
304: static void
305: resp_searchform(const struct req *req)
306: {
307: int i;
308: const char *expr, *sec, *arch;
309:
310: expr = sec = arch = "";
311:
312: for (i = 0; i < (int)req->fieldsz; i++)
313: if (0 == strcmp(req->fields[i].key, "expr"))
314: expr = req->fields[i].val;
315: else if (0 == strcmp(req->fields[i].key, "sec"))
316: sec = req->fields[i].val;
317: else if (0 == strcmp(req->fields[i].key, "arch"))
318: arch = req->fields[i].val;
319:
320: puts("<!-- Begin search form. //-->");
321: printf("<FORM ACTION=\"");
322: html_print(progname);
323: printf("/search\" METHOD=\"get\">\n");
324: puts(" <FIELDSET>" "\n"
325: " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
326: printf(" Terms: <INPUT TYPE=\"text\" "
327: "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
328: html_print(expr);
329: puts("\">");
330: printf(" Section: <INPUT TYPE=\"text\" "
331: "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
332: html_print(sec);
333: puts("\">");
334: printf(" Arch: <INPUT TYPE=\"text\" "
335: "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
336: html_print(arch);
337: puts("\">");
338: puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
339: }
340:
341: static void
342: resp_index(const struct req *req)
343: {
344:
345: resp_begin_html(200, NULL);
346: resp_searchform(req);
347: resp_end_html();
348: }
349:
350: static void
351: resp_badmanual(void)
352: {
353:
354: resp_begin_html(404, "Not Found");
355: puts("<P>Requested manual not found.</P>");
356: resp_end_html();
357: }
358:
359: static void
360: resp_badexpr(const struct req *req)
361: {
362:
363: resp_begin_html(200, NULL);
364: resp_searchform(req);
365: puts("<P>Your search didn't work.</P>");
366: resp_end_html();
367: }
1.1 kristaps 368:
369: static void
1.7 kristaps 370: resp_bad(void)
371: {
372: resp_begin_html(500, "Internal Server Error");
373: puts("<P>Generic badness happened.</P>");
374: resp_end_html();
375: }
376:
377: static void
1.6 kristaps 378: resp_baddb(void)
1.1 kristaps 379: {
380:
1.6 kristaps 381: resp_begin_html(500, "Internal Server Error");
382: puts("<P>Your database is broken.</P>");
383: resp_end_html();
1.1 kristaps 384: }
385:
386: static void
1.6 kristaps 387: resp_search(struct res *r, size_t sz, void *arg)
1.1 kristaps 388: {
389: int i;
390:
1.6 kristaps 391: if (1 == sz) {
392: /*
393: * If we have just one result, then jump there now
394: * without any delay.
395: */
396: puts("Status: 303 See Other");
397: printf("Location: http://%s%s/show/%u/%u.html\n",
398: host, progname,
399: r[0].volume, r[0].rec);
400: puts("Content-Type: text/html; charset=utf-8\n");
401: return;
402: }
403:
404: resp_begin_html(200, NULL);
405: resp_searchform((const struct req *)arg);
406:
407: if (0 == sz)
408: puts("<P>No results found.</P>");
1.1 kristaps 409:
410: for (i = 0; i < (int)sz; i++) {
1.6 kristaps 411: printf("<P><A HREF=\"");
412: html_print(progname);
413: printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
414: html_print(r[i].title);
1.1 kristaps 415: putchar('(');
1.6 kristaps 416: html_print(r[i].cat);
417: if (r[i].arch && '\0' != *r[i].arch) {
418: putchar('/');
419: html_print(r[i].arch);
420: }
421: printf(")</A> ");
422: html_print(r[i].desc);
423: puts("</P>");
1.1 kristaps 424: }
1.6 kristaps 425:
426: resp_end_html();
427: }
428:
429: /* ARGSUSED */
430: static void
431: pg_index(const struct manpaths *ps, const struct req *req, char *path)
432: {
433:
434: resp_index(req);
1.1 kristaps 435: }
436:
437: static void
1.8 ! kristaps 438: format(const char *file)
1.7 kristaps 439: {
1.8 ! kristaps 440: struct mparse *mp;
! 441: int fd;
! 442: struct mdoc *mdoc;
! 443: struct man *man;
! 444: void *vp;
! 445: enum mandoclevel rc;
1.7 kristaps 446:
1.8 ! kristaps 447: if (-1 == (fd = open(file, O_RDONLY, 0))) {
! 448: resp_baddb();
1.7 kristaps 449: return;
450: }
451:
1.8 ! kristaps 452: mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
! 453: rc = mparse_readfd(mp, fd, file);
! 454: close(fd);
1.7 kristaps 455:
1.8 ! kristaps 456: if (rc >= MANDOCLEVEL_FATAL) {
1.7 kristaps 457: resp_baddb();
458: return;
459: }
460:
1.8 ! kristaps 461: mparse_result(mp, &mdoc, &man);
! 462: vp = html_alloc(NULL);
1.7 kristaps 463:
1.8 ! kristaps 464: if (NULL != mdoc) {
! 465: resp_begin_http(200, NULL);
! 466: html_mdoc(vp, mdoc);
! 467: } else if (NULL != man) {
! 468: resp_begin_http(200, NULL);
! 469: html_man(vp, man);
! 470: } else
! 471: resp_baddb();
1.7 kristaps 472:
1.8 ! kristaps 473: html_free(vp);
! 474: mparse_free(mp);
1.7 kristaps 475: }
476:
477: static void
1.6 kristaps 478: pg_show(const struct manpaths *ps, const struct req *req, char *path)
1.1 kristaps 479: {
1.6 kristaps 480: char *sub;
1.7 kristaps 481: char file[MAXPATHLEN];
1.6 kristaps 482: int rc;
483: unsigned int vol, rec;
484: DB *db;
485: DBT key, val;
486:
487: if (NULL == path) {
488: resp_badmanual();
489: return;
490: } else if (NULL == (sub = strrchr(path, '/'))) {
491: resp_badmanual();
492: return;
493: } else
494: *sub++ = '\0';
495:
496: if ( ! (atou(path, &vol) && atou(sub, &rec))) {
497: resp_badmanual();
498: return;
499: } else if (vol >= (unsigned int)ps->sz) {
500: resp_badmanual();
501: return;
502: }
503:
504: strlcpy(file, ps->paths[vol], MAXPATHLEN);
505: strlcat(file, "/mandoc.index", MAXPATHLEN);
506:
507: /* Open the index recno(3) database. */
508:
509: db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
510: if (NULL == db) {
511: resp_baddb();
512: return;
513: }
514:
515: key.data = &rec;
516: key.size = 4;
517:
518: if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
519: rc < 0 ? resp_baddb() : resp_badmanual();
520: (*db->close)(db);
521: return;
522: }
523:
524: /* Extra filename: the first nil-terminated entry. */
525:
526: strlcpy(file, ps->paths[vol], MAXPATHLEN);
527: strlcat(file, "/", MAXPATHLEN);
528: strlcat(file, (char *)val.data, MAXPATHLEN);
529:
1.8 ! kristaps 530: (*db->close)(db);
! 531:
! 532: format(file);
1.6 kristaps 533: }
534:
535: static void
536: pg_search(const struct manpaths *ps, const struct req *req, char *path)
537: {
538: size_t tt;
539: int i, sz, rc;
540: const char *ep, *start;
541: char **cp;
542: struct opts opt;
543: struct expr *expr;
544:
545: expr = NULL;
546: cp = NULL;
547: ep = NULL;
548: sz = 0;
1.1 kristaps 549:
550: memset(&opt, 0, sizeof(struct opts));
1.6 kristaps 551:
552: for (sz = i = 0; i < (int)req->fieldsz; i++)
553: if (0 == strcmp(req->fields[i].key, "expr"))
554: ep = req->fields[i].val;
555: else if (0 == strcmp(req->fields[i].key, "sec"))
556: opt.cat = req->fields[i].val;
557: else if (0 == strcmp(req->fields[i].key, "arch"))
558: opt.arch = req->fields[i].val;
559:
560: /*
561: * Poor man's tokenisation.
562: * Just break apart by spaces.
563: * Yes, this is half-ass. But it works for now.
564: */
565:
566: while (ep && isspace((unsigned char)*ep))
567: ep++;
568:
569: while (ep && '\0' != *ep) {
570: cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
571: start = ep;
572: while ('\0' != *ep && ! isspace((unsigned char)*ep))
573: ep++;
574: cp[sz] = mandoc_malloc((ep - start) + 1);
575: memcpy(cp[sz], start, ep - start);
576: cp[sz++][ep - start] = '\0';
577: while (isspace((unsigned char)*ep))
578: ep++;
579: }
580:
581: rc = -1;
582:
583: /*
584: * Pump down into apropos backend.
585: * The resp_search() function is called with the results.
586: */
587:
588: if (NULL != (expr = exprcomp(sz, cp, &tt)))
589: rc = apropos_search
590: (ps->sz, ps->paths, &opt,
591: expr, tt, (void *)req, resp_search);
592:
593: /* ...unless errors occured. */
594:
595: if (0 == rc)
596: resp_baddb();
597: else if (-1 == rc)
598: resp_badexpr(req);
599:
600: for (i = 0; i < sz; i++)
601: free(cp[i]);
602:
603: free(cp);
604: exprfree(expr);
1.1 kristaps 605: }
606:
607: int
608: main(void)
609: {
610: int i;
611: struct req req;
1.6 kristaps 612: char *p, *path, *subpath;
613: struct manpaths paths;
614:
615: /* HTTP init: read and parse the query string. */
616:
617: progname = getenv("SCRIPT_NAME");
618: if (NULL == progname)
619: progname = "";
620:
1.7 kristaps 621: cache = getenv("CACHE_DIR");
622: if (NULL == cache)
623: cache = "/cache/man.cgi";
624:
1.8 ! kristaps 625: if (-1 == chdir(cache)) {
! 626: resp_bad();
! 627: return(EXIT_FAILURE);
1.7 kristaps 628: }
629:
1.6 kristaps 630: host = getenv("HTTP_HOST");
631: if (NULL == host)
632: host = "localhost";
1.1 kristaps 633:
634: memset(&req, 0, sizeof(struct req));
635:
636: if (NULL != (p = getenv("QUERY_STRING")))
637: kval_parse(&req.fields, &req.fieldsz, p);
638:
1.6 kristaps 639: /* Resolve leading subpath component. */
1.1 kristaps 640:
1.6 kristaps 641: subpath = path = NULL;
1.1 kristaps 642: req.page = PAGE__MAX;
643:
644: if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
645: req.page = PAGE_INDEX;
1.6 kristaps 646:
1.1 kristaps 647: if (NULL != path && '/' == *path && '\0' == *++path)
648: req.page = PAGE_INDEX;
649:
1.6 kristaps 650: /* Strip file suffix. */
651:
652: if (NULL != path && NULL != (p = strrchr(path, '.')))
653: if (NULL != p && NULL == strchr(p, '/'))
654: *p++ = '\0';
655:
656: /* Resolve subpath component. */
1.1 kristaps 657:
658: if (NULL != path && NULL != (subpath = strchr(path, '/')))
1.6 kristaps 659: *subpath++ = '\0';
1.1 kristaps 660:
1.6 kristaps 661: /* Map path into one we recognise. */
1.1 kristaps 662:
663: if (NULL != path && '\0' != *path)
664: for (i = 0; i < (int)PAGE__MAX; i++)
665: if (0 == strcmp(pages[i], path)) {
666: req.page = (enum page)i;
667: break;
668: }
669:
1.6 kristaps 670: /* Initialise MANPATH. */
671:
672: memset(&paths, 0, sizeof(struct manpaths));
1.8 ! kristaps 673: manpath_manconf("etc/man.conf", &paths);
1.6 kristaps 674:
675: /* Route pages. */
676:
1.1 kristaps 677: switch (req.page) {
678: case (PAGE_INDEX):
1.6 kristaps 679: pg_index(&paths, &req, subpath);
1.1 kristaps 680: break;
681: case (PAGE_SEARCH):
1.6 kristaps 682: pg_search(&paths, &req, subpath);
683: break;
684: case (PAGE_SHOW):
685: pg_show(&paths, &req, subpath);
1.1 kristaps 686: break;
687: default:
688: break;
689: }
690:
1.6 kristaps 691: manpath_free(&paths);
1.1 kristaps 692: kval_free(req.fields, req.fieldsz);
1.6 kristaps 693:
1.1 kristaps 694: return(EXIT_SUCCESS);
695: }
CVSweb