Annotation of mandoc/cgi.c, Revision 1.6
1.6 ! kristaps 1: /* $Id: cgi.c,v 1.5 2011/11/20 12:39:08 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
! 8: *
! 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 16: */
! 17: #ifdef HAVE_CONFIG_H
! 18: #include "config.h"
! 19: #endif
! 20:
! 21: #include <sys/param.h>
! 22: #include <sys/wait.h>
! 23:
1.1 kristaps 24: #include <assert.h>
1.6 ! kristaps 25: #include <ctype.h>
! 26: #include <errno.h>
1.1 kristaps 27: #include <fcntl.h>
1.6 ! kristaps 28: #include <limits.h>
1.1 kristaps 29: #include <regex.h>
30: #include <stdio.h>
31: #include <stdarg.h>
1.5 kristaps 32: #include <stdint.h>
1.1 kristaps 33: #include <stdlib.h>
34: #include <string.h>
1.6 ! kristaps 35: #include <unistd.h>
1.1 kristaps 36:
1.6 ! kristaps 37: #include "apropos_db.h"
1.4 schwarze 38: #include "mandoc.h"
1.6 ! kristaps 39: #include "manpath.h"
! 40:
! 41: #ifdef __linux__
! 42: # include <db_185.h>
! 43: #else
! 44: # include <db.h>
! 45: #endif
1.1 kristaps 46:
47: enum page {
48: PAGE_INDEX,
49: PAGE_SEARCH,
1.6 ! kristaps 50: PAGE_SHOW,
1.1 kristaps 51: PAGE__MAX
52: };
53:
54: struct kval {
55: char *key;
56: char *val;
57: };
58:
59: struct req {
1.6 ! kristaps 60: struct kval *fields;
1.1 kristaps 61: size_t fieldsz;
62: enum page page;
63: };
64:
1.6 ! kristaps 65: static int atou(const char *, unsigned *);
! 66: static void html_print(const char *);
1.1 kristaps 67: static int kval_decode(char *);
68: static void kval_parse(struct kval **, size_t *, char *);
69: static void kval_free(struct kval *, size_t);
1.6 ! kristaps 70: static void pg_index(const struct manpaths *,
! 71: const struct req *, char *);
! 72: static void pg_search(const struct manpaths *,
! 73: const struct req *, char *);
! 74: static void pg_show(const struct manpaths *,
! 75: const struct req *, char *);
! 76: static void resp_baddb(void);
! 77: static void resp_badexpr(const struct req *);
! 78: static void resp_badmanual(void);
! 79: static void resp_begin_html(int, const char *);
! 80: static void resp_begin_http(int, const char *);
! 81: static void resp_end_html(void);
! 82: static void resp_index(const struct req *);
! 83: static void resp_search(struct res *, size_t, void *);
! 84: static void resp_searchform(const struct req *);
! 85:
! 86: static const char *progname;
! 87: static const char *host;
1.1 kristaps 88:
89: static const char * const pages[PAGE__MAX] = {
90: "index", /* PAGE_INDEX */
91: "search", /* PAGE_SEARCH */
1.6 ! kristaps 92: "show", /* PAGE_SHOW */
1.1 kristaps 93: };
94:
1.6 ! kristaps 95: /*
! 96: * This is just OpenBSD's strtol(3) suggestion.
! 97: * I use it instead of strtonum(3) for portability's sake.
! 98: */
! 99: static int
! 100: atou(const char *buf, unsigned *v)
! 101: {
! 102: char *ep;
! 103: long lval;
! 104:
! 105: errno = 0;
! 106: lval = strtol(buf, &ep, 10);
! 107: if (buf[0] == '\0' || *ep != '\0')
! 108: return(0);
! 109: if ((errno == ERANGE && (lval == LONG_MAX ||
! 110: lval == LONG_MIN)) ||
! 111: (lval > UINT_MAX || lval < 0))
! 112: return(0);
! 113:
! 114: *v = (unsigned int)lval;
! 115: return(1);
! 116: }
1.1 kristaps 117:
1.6 ! kristaps 118: /*
! 119: * Print a word, escaping HTML along the way.
! 120: * This will pass non-ASCII straight to output: be warned!
! 121: */
1.1 kristaps 122: static void
1.6 ! kristaps 123: html_print(const char *p)
1.1 kristaps 124: {
125: char c;
1.6 ! kristaps 126:
! 127: if (NULL == p)
! 128: return;
1.1 kristaps 129:
130: while ('\0' != *p)
131: switch ((c = *p++)) {
132: case ('"'):
133: printf(""e;");
134: break;
135: case ('&'):
136: printf("&");
137: break;
138: case ('>'):
139: printf(">");
140: break;
141: case ('<'):
142: printf("<");
143: break;
144: default:
145: putchar((unsigned char)c);
146: break;
147: }
148: }
149:
150: static void
151: kval_free(struct kval *p, size_t sz)
152: {
153: int i;
154:
155: for (i = 0; i < (int)sz; i++) {
156: free(p[i].key);
157: free(p[i].val);
158: }
159: free(p);
160: }
161:
162: /*
163: * Parse out key-value pairs from an HTTP request variable.
1.6 ! kristaps 164: * This can be either a cookie or a POST/GET string, although man.cgi
! 165: * uses only GET for simplicity.
1.1 kristaps 166: */
167: static void
168: kval_parse(struct kval **kv, size_t *kvsz, char *p)
169: {
170: char *key, *val;
171: size_t sz, cur;
172:
173: cur = 0;
174:
175: while (p && '\0' != *p) {
176: while (' ' == *p)
177: p++;
178:
179: key = p;
180: val = NULL;
181:
182: if (NULL != (p = strchr(p, '='))) {
183: *p++ = '\0';
184: val = p;
185:
186: sz = strcspn(p, ";&");
187: /* LINTED */
188: p += sz;
189:
190: if ('\0' != *p)
191: *p++ = '\0';
192: } else {
193: p = key;
194: sz = strcspn(p, ";&");
195: /* LINTED */
196: p += sz;
197:
198: if ('\0' != *p)
199: p++;
200: continue;
201: }
202:
203: if ('\0' == *key || '\0' == *val)
204: continue;
205:
206: /* Just abort handling. */
207:
208: if ( ! kval_decode(key))
209: return;
210: if ( ! kval_decode(val))
211: return;
212:
213: if (*kvsz + 1 >= cur) {
214: cur++;
215: *kv = mandoc_realloc
216: (*kv, cur * sizeof(struct kval));
217: }
218:
219: (*kv)[(int)*kvsz].key = mandoc_strdup(key);
220: (*kv)[(int)*kvsz].val = mandoc_strdup(val);
221: (*kvsz)++;
222: }
223: }
224:
225: /*
1.6 ! kristaps 226: * HTTP-decode a string. The standard explanation is that this turns
! 227: * "%4e+foo" into "n foo" in the regular way. This is done in-place
! 228: * over the allocated string.
1.1 kristaps 229: */
230: static int
231: kval_decode(char *p)
232: {
233: char hex[3];
234: int c;
235:
236: hex[2] = '\0';
237:
238: for ( ; '\0' != *p; p++) {
239: if ('%' == *p) {
240: if ('\0' == (hex[0] = *(p + 1)))
241: return(0);
242: if ('\0' == (hex[1] = *(p + 2)))
243: return(0);
244: if (1 != sscanf(hex, "%x", &c))
245: return(0);
246: if ('\0' == c)
247: return(0);
248:
249: *p = (char)c;
250: memmove(p + 1, p + 3, strlen(p + 3) + 1);
251: } else
252: *p = '+' == *p ? ' ' : *p;
253: }
254:
255: *p = '\0';
256: return(1);
257: }
258:
1.6 ! kristaps 259: static void
! 260: resp_begin_http(int code, const char *msg)
! 261: {
! 262:
! 263: if (200 != code)
! 264: printf("Status: %d %s\n", code, msg);
! 265:
! 266: puts("Content-Type: text/html; charset=utf-8" "\n"
! 267: "Cache-Control: no-cache" "\n"
! 268: "Pragma: no-cache" "\n"
! 269: "");
! 270:
! 271: fflush(stdout);
! 272: }
! 273:
! 274: static void
! 275: resp_begin_html(int code, const char *msg)
! 276: {
! 277:
! 278: resp_begin_http(code, msg);
! 279:
! 280: puts("<!DOCTYPE HTML PUBLIC " "\n"
! 281: " \"-//W3C//DTD HTML 4.01//EN\"" "\n"
! 282: " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n"
! 283: "<HTML>" "\n"
! 284: " <HEAD>" "\n"
! 285: " <TITLE>System Manpage Reference</TITLE>" "\n"
! 286: " </HEAD>" "\n"
! 287: " <BODY>" "\n"
! 288: "<!-- Begin page content. //-->");
! 289: }
! 290:
! 291: static void
! 292: resp_end_html(void)
! 293: {
! 294:
! 295: puts(" </BODY>\n</HTML>");
! 296: }
! 297:
! 298: static void
! 299: resp_searchform(const struct req *req)
! 300: {
! 301: int i;
! 302: const char *expr, *sec, *arch;
! 303:
! 304: expr = sec = arch = "";
! 305:
! 306: for (i = 0; i < (int)req->fieldsz; i++)
! 307: if (0 == strcmp(req->fields[i].key, "expr"))
! 308: expr = req->fields[i].val;
! 309: else if (0 == strcmp(req->fields[i].key, "sec"))
! 310: sec = req->fields[i].val;
! 311: else if (0 == strcmp(req->fields[i].key, "arch"))
! 312: arch = req->fields[i].val;
! 313:
! 314: puts("<!-- Begin search form. //-->");
! 315: printf("<FORM ACTION=\"");
! 316: html_print(progname);
! 317: printf("/search\" METHOD=\"get\">\n");
! 318: puts(" <FIELDSET>" "\n"
! 319: " <INPUT TYPE=\"submit\" VALUE=\"Search:\">");
! 320: printf(" Terms: <INPUT TYPE=\"text\" "
! 321: "SIZE=\"60\" NAME=\"expr\" VALUE=\"");
! 322: html_print(expr);
! 323: puts("\">");
! 324: printf(" Section: <INPUT TYPE=\"text\" "
! 325: "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
! 326: html_print(sec);
! 327: puts("\">");
! 328: printf(" Arch: <INPUT TYPE=\"text\" "
! 329: "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
! 330: html_print(arch);
! 331: puts("\">");
! 332: puts(" </FIELDSET>\n</FORM>\n<!-- End search form. //-->");
! 333: }
! 334:
! 335: static void
! 336: resp_index(const struct req *req)
! 337: {
! 338:
! 339: resp_begin_html(200, NULL);
! 340: resp_searchform(req);
! 341: resp_end_html();
! 342: }
! 343:
! 344: static void
! 345: resp_badmanual(void)
! 346: {
! 347:
! 348: resp_begin_html(404, "Not Found");
! 349: puts("<P>Requested manual not found.</P>");
! 350: resp_end_html();
! 351: }
! 352:
! 353: static void
! 354: resp_badexpr(const struct req *req)
! 355: {
! 356:
! 357: resp_begin_html(200, NULL);
! 358: resp_searchform(req);
! 359: puts("<P>Your search didn't work.</P>");
! 360: resp_end_html();
! 361: }
1.1 kristaps 362:
363: static void
1.6 ! kristaps 364: resp_baddb(void)
1.1 kristaps 365: {
366:
1.6 ! kristaps 367: resp_begin_html(500, "Internal Server Error");
! 368: puts("<P>Your database is broken.</P>");
! 369: resp_end_html();
1.1 kristaps 370: }
371:
372: static void
1.6 ! kristaps 373: resp_search(struct res *r, size_t sz, void *arg)
1.1 kristaps 374: {
375: int i;
376:
1.6 ! kristaps 377: if (1 == sz) {
! 378: /*
! 379: * If we have just one result, then jump there now
! 380: * without any delay.
! 381: */
! 382: puts("Status: 303 See Other");
! 383: printf("Location: http://%s%s/show/%u/%u.html\n",
! 384: host, progname,
! 385: r[0].volume, r[0].rec);
! 386: puts("Content-Type: text/html; charset=utf-8\n");
! 387: return;
! 388: }
! 389:
! 390: resp_begin_html(200, NULL);
! 391: resp_searchform((const struct req *)arg);
! 392:
! 393: if (0 == sz)
! 394: puts("<P>No results found.</P>");
1.1 kristaps 395:
396: for (i = 0; i < (int)sz; i++) {
1.6 ! kristaps 397: printf("<P><A HREF=\"");
! 398: html_print(progname);
! 399: printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
! 400: html_print(r[i].title);
1.1 kristaps 401: putchar('(');
1.6 ! kristaps 402: html_print(r[i].cat);
! 403: if (r[i].arch && '\0' != *r[i].arch) {
! 404: putchar('/');
! 405: html_print(r[i].arch);
! 406: }
! 407: printf(")</A> ");
! 408: html_print(r[i].desc);
! 409: puts("</P>");
1.1 kristaps 410: }
1.6 ! kristaps 411:
! 412: resp_end_html();
! 413: }
! 414:
! 415: /* ARGSUSED */
! 416: static void
! 417: pg_index(const struct manpaths *ps, const struct req *req, char *path)
! 418: {
! 419:
! 420: resp_index(req);
1.1 kristaps 421: }
422:
423: static void
1.6 ! kristaps 424: pg_show(const struct manpaths *ps, const struct req *req, char *path)
1.1 kristaps 425: {
1.6 ! kristaps 426: pid_t pid;
! 427: char *sub;
! 428: char file[MAXPATHLEN], cmd[MAXPATHLEN];
! 429: int rc;
! 430: unsigned int vol, rec;
! 431: DB *db;
! 432: DBT key, val;
! 433:
! 434: if (NULL == path) {
! 435: resp_badmanual();
! 436: return;
! 437: } else if (NULL == (sub = strrchr(path, '/'))) {
! 438: resp_badmanual();
! 439: return;
! 440: } else
! 441: *sub++ = '\0';
! 442:
! 443: if ( ! (atou(path, &vol) && atou(sub, &rec))) {
! 444: resp_badmanual();
! 445: return;
! 446: } else if (vol >= (unsigned int)ps->sz) {
! 447: resp_badmanual();
! 448: return;
! 449: }
! 450:
! 451: strlcpy(file, ps->paths[vol], MAXPATHLEN);
! 452: strlcat(file, "/mandoc.index", MAXPATHLEN);
! 453:
! 454: /* Open the index recno(3) database. */
! 455:
! 456: db = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
! 457: if (NULL == db) {
! 458: resp_baddb();
! 459: return;
! 460: }
! 461:
! 462: key.data = &rec;
! 463: key.size = 4;
! 464:
! 465: if (0 != (rc = (*db->get)(db, &key, &val, 0))) {
! 466: rc < 0 ? resp_baddb() : resp_badmanual();
! 467: (*db->close)(db);
! 468: return;
! 469: }
! 470:
! 471: /* Extra filename: the first nil-terminated entry. */
! 472:
! 473: strlcpy(file, ps->paths[vol], MAXPATHLEN);
! 474: strlcat(file, "/", MAXPATHLEN);
! 475: strlcat(file, (char *)val.data, MAXPATHLEN);
! 476:
! 477: (*db->close)(db);
! 478:
! 479: strlcpy(cmd, "man=", MAXPATHLEN);
! 480: strlcat(cmd, progname, MAXPATHLEN);
! 481: strlcat(cmd, "/search?expr=%N&sec=%S", MAXPATHLEN);
! 482:
! 483: /* Get ready to call the child mandoc(1) process. */
1.1 kristaps 484:
1.6 ! kristaps 485: if (-1 == (pid = fork()))
! 486: exit(EXIT_FAILURE);
1.1 kristaps 487:
1.6 ! kristaps 488: if (pid > 0) {
! 489: waitpid(pid, NULL, 0);
1.1 kristaps 490: return;
1.6 ! kristaps 491: }
! 492:
! 493: dup2(STDOUT_FILENO, STDERR_FILENO);
! 494:
! 495: puts("Content-Type: text/html; charset=utf-8\n");
! 496:
! 497: fflush(stdout);
! 498:
! 499: execlp("mandoc", "mandoc", "-T",
! 500: "html", "-O", cmd, file, (char *)NULL);
! 501: }
! 502:
! 503: static void
! 504: pg_search(const struct manpaths *ps, const struct req *req, char *path)
! 505: {
! 506: size_t tt;
! 507: int i, sz, rc;
! 508: const char *ep, *start;
! 509: char **cp;
! 510: struct opts opt;
! 511: struct expr *expr;
! 512:
! 513: expr = NULL;
! 514: cp = NULL;
! 515: ep = NULL;
! 516: sz = 0;
1.1 kristaps 517:
518: memset(&opt, 0, sizeof(struct opts));
1.6 ! kristaps 519:
! 520: for (sz = i = 0; i < (int)req->fieldsz; i++)
! 521: if (0 == strcmp(req->fields[i].key, "expr"))
! 522: ep = req->fields[i].val;
! 523: else if (0 == strcmp(req->fields[i].key, "sec"))
! 524: opt.cat = req->fields[i].val;
! 525: else if (0 == strcmp(req->fields[i].key, "arch"))
! 526: opt.arch = req->fields[i].val;
! 527:
! 528: /*
! 529: * Poor man's tokenisation.
! 530: * Just break apart by spaces.
! 531: * Yes, this is half-ass. But it works for now.
! 532: */
! 533:
! 534: while (ep && isspace((unsigned char)*ep))
! 535: ep++;
! 536:
! 537: while (ep && '\0' != *ep) {
! 538: cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
! 539: start = ep;
! 540: while ('\0' != *ep && ! isspace((unsigned char)*ep))
! 541: ep++;
! 542: cp[sz] = mandoc_malloc((ep - start) + 1);
! 543: memcpy(cp[sz], start, ep - start);
! 544: cp[sz++][ep - start] = '\0';
! 545: while (isspace((unsigned char)*ep))
! 546: ep++;
! 547: }
! 548:
! 549: rc = -1;
! 550:
! 551: /*
! 552: * Pump down into apropos backend.
! 553: * The resp_search() function is called with the results.
! 554: */
! 555:
! 556: if (NULL != (expr = exprcomp(sz, cp, &tt)))
! 557: rc = apropos_search
! 558: (ps->sz, ps->paths, &opt,
! 559: expr, tt, (void *)req, resp_search);
! 560:
! 561: /* ...unless errors occured. */
! 562:
! 563: if (0 == rc)
! 564: resp_baddb();
! 565: else if (-1 == rc)
! 566: resp_badexpr(req);
! 567:
! 568: for (i = 0; i < sz; i++)
! 569: free(cp[i]);
! 570:
! 571: free(cp);
! 572: exprfree(expr);
1.1 kristaps 573: }
574:
575: int
576: main(void)
577: {
578: int i;
579: struct req req;
1.6 ! kristaps 580: char *p, *path, *subpath;
! 581: struct manpaths paths;
! 582:
! 583: /* HTTP init: read and parse the query string. */
! 584:
! 585: progname = getenv("SCRIPT_NAME");
! 586: if (NULL == progname)
! 587: progname = "";
! 588:
! 589: host = getenv("HTTP_HOST");
! 590: if (NULL == host)
! 591: host = "localhost";
1.1 kristaps 592:
593: memset(&req, 0, sizeof(struct req));
594:
595: if (NULL != (p = getenv("QUERY_STRING")))
596: kval_parse(&req.fields, &req.fieldsz, p);
597:
1.6 ! kristaps 598: /* Resolve leading subpath component. */
1.1 kristaps 599:
1.6 ! kristaps 600: subpath = path = NULL;
1.1 kristaps 601: req.page = PAGE__MAX;
602:
603: if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
604: req.page = PAGE_INDEX;
1.6 ! kristaps 605:
1.1 kristaps 606: if (NULL != path && '/' == *path && '\0' == *++path)
607: req.page = PAGE_INDEX;
608:
1.6 ! kristaps 609: /* Strip file suffix. */
! 610:
! 611: if (NULL != path && NULL != (p = strrchr(path, '.')))
! 612: if (NULL != p && NULL == strchr(p, '/'))
! 613: *p++ = '\0';
! 614:
! 615: /* Resolve subpath component. */
1.1 kristaps 616:
617: if (NULL != path && NULL != (subpath = strchr(path, '/')))
1.6 ! kristaps 618: *subpath++ = '\0';
1.1 kristaps 619:
1.6 ! kristaps 620: /* Map path into one we recognise. */
1.1 kristaps 621:
622: if (NULL != path && '\0' != *path)
623: for (i = 0; i < (int)PAGE__MAX; i++)
624: if (0 == strcmp(pages[i], path)) {
625: req.page = (enum page)i;
626: break;
627: }
628:
1.6 ! kristaps 629: /* Initialise MANPATH. */
! 630:
! 631: memset(&paths, 0, sizeof(struct manpaths));
! 632: manpath_parse(&paths, NULL, NULL);
! 633:
! 634: /* Route pages. */
! 635:
1.1 kristaps 636: switch (req.page) {
637: case (PAGE_INDEX):
1.6 ! kristaps 638: pg_index(&paths, &req, subpath);
1.1 kristaps 639: break;
640: case (PAGE_SEARCH):
1.6 ! kristaps 641: pg_search(&paths, &req, subpath);
! 642: break;
! 643: case (PAGE_SHOW):
! 644: pg_show(&paths, &req, subpath);
1.1 kristaps 645: break;
646: default:
647: break;
648: }
649:
1.6 ! kristaps 650: manpath_free(&paths);
1.1 kristaps 651: kval_free(req.fields, req.fieldsz);
1.6 ! kristaps 652:
1.1 kristaps 653: return(EXIT_SUCCESS);
654: }
CVSweb