Annotation of mandoc/html.c, Revision 1.84
1.84 ! kristaps 1: /* $Id: html.c,v 1.83 2009/11/10 16:32:00 kristaps Exp $ */
1.1 kristaps 2: /*
1.29 kristaps 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.29 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.41 kristaps 17: #include <sys/types.h>
1.30 kristaps 18:
1.1 kristaps 19: #include <assert.h>
1.68 kristaps 20: #include <ctype.h>
1.76 kristaps 21: #include <stdarg.h>
1.29 kristaps 22: #include <stdio.h>
1.63 kristaps 23: #include <stdint.h>
1.1 kristaps 24: #include <stdlib.h>
1.33 kristaps 25: #include <string.h>
1.45 kristaps 26: #include <unistd.h>
1.1 kristaps 27:
1.58 kristaps 28: #include "out.h"
1.32 kristaps 29: #include "chars.h"
1.51 kristaps 30: #include "html.h"
1.64 kristaps 31: #include "main.h"
1.2 kristaps 32:
1.63 kristaps 33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
1.29 kristaps 35: #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36: #define DTD "http://www.w3.org/TR/html4/strict.dtd"
1.8 kristaps 37:
1.29 kristaps 38: struct htmldata {
1.63 kristaps 39: const char *name;
1.29 kristaps 40: int flags;
1.30 kristaps 41: #define HTML_CLRLINE (1 << 0)
42: #define HTML_NOSTACK (1 << 1)
1.29 kristaps 43: };
1.7 kristaps 44:
1.29 kristaps 45: static const struct htmldata htmltags[TAG_MAX] = {
1.30 kristaps 46: {"html", HTML_CLRLINE}, /* TAG_HTML */
47: {"head", HTML_CLRLINE}, /* TAG_HEAD */
48: {"body", HTML_CLRLINE}, /* TAG_BODY */
49: {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
1.33 kristaps 50: {"title", HTML_CLRLINE}, /* TAG_TITLE */
1.30 kristaps 51: {"div", HTML_CLRLINE}, /* TAG_DIV */
1.29 kristaps 52: {"h1", 0}, /* TAG_H1 */
53: {"h2", 0}, /* TAG_H2 */
1.30 kristaps 54: {"p", HTML_CLRLINE}, /* TAG_P */
1.29 kristaps 55: {"span", 0}, /* TAG_SPAN */
1.30 kristaps 56: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57: {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58: {"a", 0}, /* TAG_A */
1.33 kristaps 59: {"table", HTML_CLRLINE}, /* TAG_TABLE */
60: {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61: {"tr", HTML_CLRLINE}, /* TAG_TR */
62: {"td", HTML_CLRLINE}, /* TAG_TD */
1.34 kristaps 63: {"li", HTML_CLRLINE}, /* TAG_LI */
64: {"ul", HTML_CLRLINE}, /* TAG_UL */
65: {"ol", HTML_CLRLINE}, /* TAG_OL */
1.41 kristaps 66: {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
1.29 kristaps 67: };
1.10 kristaps 68:
1.82 kristaps 69: static const char *const htmlattrs[ATTR_MAX] = {
1.29 kristaps 70: "http-equiv",
71: "content",
72: "name",
73: "rel",
74: "href",
75: "type",
76: "media",
1.33 kristaps 77: "class",
78: "style",
79: "width",
80: "valign",
1.54 kristaps 81: "target",
1.57 kristaps 82: "id",
1.67 kristaps 83: "summary",
1.29 kristaps 84: };
1.10 kristaps 85:
1.33 kristaps 86: #ifdef __linux__
1.43 kristaps 87: extern int getsubopt(char **, char * const *, char **);
1.33 kristaps 88: #endif
1.29 kristaps 89:
1.82 kristaps 90:
1.83 kristaps 91: static void print_spec(struct html *, const char *, size_t);
92: static void print_res(struct html *, const char *, size_t);
1.82 kristaps 93: static void print_ctag(struct html *, enum htmltag);
94: static void print_encode(struct html *, const char *);
95:
96:
1.29 kristaps 97: void *
1.43 kristaps 98: html_alloc(char *outopts)
1.10 kristaps 99: {
1.30 kristaps 100: struct html *h;
1.63 kristaps 101: const char *toks[4];
102: char *v;
1.43 kristaps 103:
104: toks[0] = "style";
1.53 kristaps 105: toks[1] = "man";
1.54 kristaps 106: toks[2] = "includes";
107: toks[3] = NULL;
1.30 kristaps 108:
1.72 kristaps 109: h = calloc(1, sizeof(struct html));
110: if (NULL == h) {
1.75 kristaps 111: perror(NULL);
1.72 kristaps 112: exit(EXIT_FAILURE);
113: }
1.10 kristaps 114:
1.66 kristaps 115: h->tags.head = NULL;
116: h->ords.head = NULL;
1.72 kristaps 117: h->symtab = chars_init(CHARS_HTML);
1.41 kristaps 118:
1.47 kristaps 119: while (outopts && *outopts)
1.63 kristaps 120: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
1.43 kristaps 121: case (0):
122: h->style = v;
123: break;
124: case (1):
1.53 kristaps 125: h->base_man = v;
1.43 kristaps 126: break;
1.54 kristaps 127: case (2):
128: h->base_includes = v;
129: break;
1.43 kristaps 130: default:
131: break;
132: }
133:
1.30 kristaps 134: return(h);
1.29 kristaps 135: }
1.10 kristaps 136:
1.33 kristaps 137:
1.29 kristaps 138: void
139: html_free(void *p)
140: {
1.30 kristaps 141: struct tag *tag;
1.37 kristaps 142: struct ord *ord;
1.30 kristaps 143: struct html *h;
144:
145: h = (struct html *)p;
1.10 kristaps 146:
1.66 kristaps 147: while ((ord = h->ords.head) != NULL) {
148: h->ords.head = ord->next;
1.37 kristaps 149: free(ord);
150: }
151:
1.66 kristaps 152: while ((tag = h->tags.head) != NULL) {
153: h->tags.head = tag->next;
1.30 kristaps 154: free(tag);
155: }
1.36 kristaps 156:
157: if (h->symtab)
158: chars_free(h->symtab);
1.53 kristaps 159:
1.30 kristaps 160: free(h);
1.10 kristaps 161: }
1.2 kristaps 162:
1.33 kristaps 163:
1.51 kristaps 164: void
1.29 kristaps 165: print_gen_head(struct html *h)
166: {
1.41 kristaps 167: struct htmlpair tag[4];
168:
169: tag[0].key = ATTR_HTTPEQUIV;
170: tag[0].val = "Content-Type";
171: tag[1].key = ATTR_CONTENT;
172: tag[1].val = "text/html; charset=utf-8";
173: print_otag(h, TAG_META, 2, tag);
174:
175: tag[0].key = ATTR_NAME;
176: tag[0].val = "resource-type";
177: tag[1].key = ATTR_CONTENT;
178: tag[1].val = "document";
179: print_otag(h, TAG_META, 2, tag);
180:
181: if (h->style) {
182: tag[0].key = ATTR_REL;
183: tag[0].val = "stylesheet";
184: tag[1].key = ATTR_HREF;
185: tag[1].val = h->style;
186: tag[2].key = ATTR_TYPE;
187: tag[2].val = "text/css";
188: tag[3].key = ATTR_MEDIA;
189: tag[3].val = "all";
190: print_otag(h, TAG_LINK, 4, tag);
191: }
1.4 kristaps 192: }
193:
1.33 kristaps 194:
1.29 kristaps 195: static void
1.83 kristaps 196: print_spec(struct html *h, const char *p, size_t len)
1.32 kristaps 197: {
198: const char *rhs;
199: size_t sz;
200:
1.83 kristaps 201: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.32 kristaps 202:
203: if (NULL == rhs)
204: return;
1.76 kristaps 205: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 206: }
207:
1.33 kristaps 208:
1.32 kristaps 209: static void
1.83 kristaps 210: print_res(struct html *h, const char *p, size_t len)
1.32 kristaps 211: {
212: const char *rhs;
213: size_t sz;
214:
1.83 kristaps 215: rhs = chars_a2res(h->symtab, p, len, &sz);
1.32 kristaps 216:
217: if (NULL == rhs)
218: return;
1.76 kristaps 219: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 220: }
221:
1.33 kristaps 222:
1.32 kristaps 223: static void
224: print_encode(struct html *h, const char *p)
1.29 kristaps 225: {
1.77 kristaps 226: size_t sz;
1.82 kristaps 227: int len;
228: const char *seq;
229: enum roffdeco deco;
1.14 kristaps 230:
1.32 kristaps 231: for (; *p; p++) {
1.77 kristaps 232: sz = strcspn(p, "\\<>&");
233:
234: fwrite(p, 1, sz, stdout);
1.80 kristaps 235: p += /* LINTED */
236: sz;
1.77 kristaps 237:
1.82 kristaps 238: if ('<' == *p) {
239: printf("<");
240: continue;
241: } else if ('>' == *p) {
242: printf(">");
243: continue;
244: } else if ('&' == *p) {
245: printf("&");
1.34 kristaps 246: continue;
1.77 kristaps 247: } else if ('\0' == *p)
248: break;
249:
1.82 kristaps 250: seq = ++p;
251: len = a2roffdeco(&deco, &seq, &sz);
252:
253: switch (deco) {
254: case (DECO_RESERVED):
255: print_res(h, seq, sz);
256: break;
257: case (DECO_SPECIAL):
258: print_spec(h, seq, sz);
259: break;
260: default:
261: break;
262: }
263:
264: p += len - 1;
1.84 ! kristaps 265:
! 266: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
! 267: h->flags |= HTML_NOSPACE;
1.32 kristaps 268: }
1.14 kristaps 269: }
270:
271:
1.51 kristaps 272: struct tag *
1.29 kristaps 273: print_otag(struct html *h, enum htmltag tag,
274: int sz, const struct htmlpair *p)
1.14 kristaps 275: {
1.29 kristaps 276: int i;
1.30 kristaps 277: struct tag *t;
278:
279: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.72 kristaps 280: t = malloc(sizeof(struct tag));
281: if (NULL == t) {
1.75 kristaps 282: perror(NULL);
1.72 kristaps 283: exit(EXIT_FAILURE);
284: }
1.30 kristaps 285: t->tag = tag;
1.66 kristaps 286: t->next = h->tags.head;
287: h->tags.head = t;
1.30 kristaps 288: } else
289: t = NULL;
1.29 kristaps 290:
291: if ( ! (HTML_NOSPACE & h->flags))
1.30 kristaps 292: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.78 kristaps 293: putchar(' ');
1.29 kristaps 294:
295: printf("<%s", htmltags[tag].name);
296: for (i = 0; i < sz; i++) {
297: printf(" %s=\"", htmlattrs[p[i].key]);
298: assert(p->val);
1.32 kristaps 299: print_encode(h, p[i].val);
1.78 kristaps 300: putchar('\"');
1.29 kristaps 301: }
1.78 kristaps 302: putchar('>');
1.14 kristaps 303:
1.29 kristaps 304: h->flags |= HTML_NOSPACE;
1.30 kristaps 305: if (HTML_CLRLINE & htmltags[tag].flags)
306: h->flags |= HTML_NEWLINE;
307: else
308: h->flags &= ~HTML_NEWLINE;
1.14 kristaps 309:
1.30 kristaps 310: return(t);
1.14 kristaps 311: }
312:
313:
314: /* ARGSUSED */
1.29 kristaps 315: static void
316: print_ctag(struct html *h, enum htmltag tag)
1.14 kristaps 317: {
318:
1.29 kristaps 319: printf("</%s>", htmltags[tag].name);
1.71 kristaps 320: if (HTML_CLRLINE & htmltags[tag].flags) {
1.29 kristaps 321: h->flags |= HTML_NOSPACE;
1.30 kristaps 322: h->flags |= HTML_NEWLINE;
1.78 kristaps 323: putchar('\n');
1.71 kristaps 324: } else
1.30 kristaps 325: h->flags &= ~HTML_NEWLINE;
1.14 kristaps 326: }
327:
328:
1.29 kristaps 329: /* ARGSUSED */
1.51 kristaps 330: void
1.29 kristaps 331: print_gen_doctype(struct html *h)
1.1 kristaps 332: {
1.29 kristaps 333:
1.46 kristaps 334: printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
1.1 kristaps 335: }
336:
337:
1.51 kristaps 338: void
1.29 kristaps 339: print_text(struct html *h, const char *p)
1.1 kristaps 340: {
341:
1.29 kristaps 342: if (*p && 0 == *(p + 1))
343: switch (*p) {
344: case('.'):
345: /* FALLTHROUGH */
346: case(','):
347: /* FALLTHROUGH */
348: case(';'):
349: /* FALLTHROUGH */
350: case(':'):
351: /* FALLTHROUGH */
352: case('?'):
353: /* FALLTHROUGH */
354: case('!'):
355: /* FALLTHROUGH */
356: case(')'):
357: /* FALLTHROUGH */
358: case(']'):
359: /* FALLTHROUGH */
360: case('}'):
1.52 kristaps 361: if ( ! (HTML_IGNDELIM & h->flags))
362: h->flags |= HTML_NOSPACE;
1.30 kristaps 363: break;
1.29 kristaps 364: default:
365: break;
366: }
1.1 kristaps 367:
1.29 kristaps 368: if ( ! (h->flags & HTML_NOSPACE))
1.78 kristaps 369: putchar(' ');
1.30 kristaps 370:
1.29 kristaps 371: h->flags &= ~HTML_NOSPACE;
1.30 kristaps 372: h->flags &= ~HTML_NEWLINE;
1.1 kristaps 373:
1.29 kristaps 374: if (p)
1.32 kristaps 375: print_encode(h, p);
1.8 kristaps 376:
1.29 kristaps 377: if (*p && 0 == *(p + 1))
378: switch (*p) {
379: case('('):
380: /* FALLTHROUGH */
381: case('['):
382: /* FALLTHROUGH */
383: case('{'):
384: h->flags |= HTML_NOSPACE;
1.30 kristaps 385: break;
1.29 kristaps 386: default:
387: break;
388: }
1.1 kristaps 389: }
1.30 kristaps 390:
391:
1.51 kristaps 392: void
1.30 kristaps 393: print_tagq(struct html *h, const struct tag *until)
394: {
395: struct tag *tag;
396:
1.66 kristaps 397: while ((tag = h->tags.head) != NULL) {
1.30 kristaps 398: print_ctag(h, tag->tag);
1.66 kristaps 399: h->tags.head = tag->next;
1.30 kristaps 400: free(tag);
401: if (until && tag == until)
402: return;
403: }
404: }
405:
406:
1.51 kristaps 407: void
1.30 kristaps 408: print_stagq(struct html *h, const struct tag *suntil)
409: {
410: struct tag *tag;
411:
1.66 kristaps 412: while ((tag = h->tags.head) != NULL) {
1.30 kristaps 413: if (suntil && tag == suntil)
414: return;
415: print_ctag(h, tag->tag);
1.66 kristaps 416: h->tags.head = tag->next;
1.30 kristaps 417: free(tag);
418: }
419: }
1.55 kristaps 420:
421:
422: void
423: bufinit(struct html *h)
424: {
425:
426: h->buf[0] = '\0';
427: h->buflen = 0;
428: }
429:
430:
431: void
1.58 kristaps 432: bufcat_style(struct html *h, const char *key, const char *val)
433: {
434:
435: bufcat(h, key);
436: bufncat(h, ":", 1);
437: bufcat(h, val);
438: bufncat(h, ";", 1);
439: }
440:
441:
442: void
1.55 kristaps 443: bufcat(struct html *h, const char *p)
444: {
445:
446: bufncat(h, p, strlen(p));
447: }
448:
449:
450: void
451: buffmt(struct html *h, const char *fmt, ...)
452: {
453: va_list ap;
454:
455: va_start(ap, fmt);
1.56 kristaps 456: (void)vsnprintf(h->buf + (int)h->buflen,
1.55 kristaps 457: BUFSIZ - h->buflen - 1, fmt, ap);
458: va_end(ap);
459: h->buflen = strlen(h->buf);
460: }
461:
462:
463: void
464: bufncat(struct html *h, const char *p, size_t sz)
465: {
466:
467: if (h->buflen + sz > BUFSIZ - 1)
468: sz = BUFSIZ - 1 - h->buflen;
469:
470: (void)strncat(h->buf, p, sz);
471: h->buflen += sz;
472: }
473:
474:
475: void
476: buffmt_includes(struct html *h, const char *name)
477: {
478: const char *p, *pp;
479:
480: pp = h->base_includes;
1.61 kristaps 481:
482: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 483: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 484: switch (*(p + 1)) {
485: case('I'):
486: bufcat(h, name);
487: break;
488: default:
489: bufncat(h, p, 2);
490: break;
491: }
492: pp = p + 2;
493: }
494: if (pp)
495: bufcat(h, pp);
496: }
497:
498:
499: void
500: buffmt_man(struct html *h,
501: const char *name, const char *sec)
502: {
503: const char *p, *pp;
504:
505: pp = h->base_man;
1.61 kristaps 506:
507: /* LINTED */
508: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 509: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 510: switch (*(p + 1)) {
511: case('S'):
1.58 kristaps 512: bufcat(h, sec ? sec : "1");
1.55 kristaps 513: break;
514: case('N'):
1.58 kristaps 515: buffmt(h, name);
1.55 kristaps 516: break;
517: default:
518: bufncat(h, p, 2);
519: break;
520: }
521: pp = p + 2;
522: }
523: if (pp)
524: bufcat(h, pp);
525: }
1.58 kristaps 526:
527:
528: void
529: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
530: {
1.62 kristaps 531: double v;
1.63 kristaps 532: const char *u;
1.58 kristaps 533:
534: v = su->scale;
535:
536: switch (su->unit) {
537: case (SCALE_CM):
538: u = "cm";
539: break;
540: case (SCALE_IN):
541: u = "in";
542: break;
543: case (SCALE_PC):
544: u = "pc";
545: break;
546: case (SCALE_PT):
547: u = "pt";
548: break;
1.59 kristaps 549: case (SCALE_EM):
550: u = "em";
551: break;
1.58 kristaps 552: case (SCALE_MM):
553: if (0 == (v /= 100))
554: v = 1;
555: u = "em";
556: break;
1.59 kristaps 557: case (SCALE_EN):
558: u = "ex";
559: break;
560: case (SCALE_BU):
561: u = "ex";
562: break;
1.58 kristaps 563: case (SCALE_VS):
564: u = "em";
565: break;
566: default:
567: u = "ex";
568: break;
569: }
570:
1.62 kristaps 571: if (su->pt)
572: buffmt(h, "%s: %f%s;", p, v, u);
573: else
574: /* LINTED */
575: buffmt(h, "%s: %d%s;", p, (int)v, u);
1.58 kristaps 576: }
1.65 kristaps 577:
1.68 kristaps 578:
579: void
1.70 kristaps 580: html_idcat(char *dst, const char *src, int sz)
1.68 kristaps 581: {
1.70 kristaps 582: int ssz;
1.68 kristaps 583:
584: assert(sz);
585:
586: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
587:
1.70 kristaps 588: for ( ; *dst != '\0' && sz; dst++, sz--)
1.68 kristaps 589: /* Jump to end. */ ;
590:
1.70 kristaps 591: assert(sz > 2);
1.68 kristaps 592:
1.70 kristaps 593: /* We can't start with a number (bah). */
1.68 kristaps 594:
1.70 kristaps 595: *dst++ = 'x';
1.68 kristaps 596: *dst = '\0';
1.70 kristaps 597: sz--;
598:
599: for ( ; *src != '\0' && sz > 1; src++) {
1.73 kristaps 600: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
1.70 kristaps 601: sz -= ssz;
602: dst += ssz;
603: }
1.68 kristaps 604: }
CVSweb