Annotation of mandoc/html.c, Revision 1.90
1.90 ! kristaps 1: /* $Id: html.c,v 1.89 2009/11/15 06:45:31 kristaps Exp $ */
1.1 kristaps 2: /*
1.29 kristaps 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.29 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.41 kristaps 17: #include <sys/types.h>
1.30 kristaps 18:
1.1 kristaps 19: #include <assert.h>
1.68 kristaps 20: #include <ctype.h>
1.76 kristaps 21: #include <stdarg.h>
1.29 kristaps 22: #include <stdio.h>
1.63 kristaps 23: #include <stdint.h>
1.1 kristaps 24: #include <stdlib.h>
1.33 kristaps 25: #include <string.h>
1.45 kristaps 26: #include <unistd.h>
1.1 kristaps 27:
1.58 kristaps 28: #include "out.h"
1.32 kristaps 29: #include "chars.h"
1.51 kristaps 30: #include "html.h"
1.64 kristaps 31: #include "main.h"
1.2 kristaps 32:
1.63 kristaps 33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
1.29 kristaps 35: #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36: #define DTD "http://www.w3.org/TR/html4/strict.dtd"
1.8 kristaps 37:
1.29 kristaps 38: struct htmldata {
1.63 kristaps 39: const char *name;
1.29 kristaps 40: int flags;
1.30 kristaps 41: #define HTML_CLRLINE (1 << 0)
42: #define HTML_NOSTACK (1 << 1)
1.29 kristaps 43: };
1.7 kristaps 44:
1.29 kristaps 45: static const struct htmldata htmltags[TAG_MAX] = {
1.30 kristaps 46: {"html", HTML_CLRLINE}, /* TAG_HTML */
47: {"head", HTML_CLRLINE}, /* TAG_HEAD */
48: {"body", HTML_CLRLINE}, /* TAG_BODY */
49: {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
1.33 kristaps 50: {"title", HTML_CLRLINE}, /* TAG_TITLE */
1.30 kristaps 51: {"div", HTML_CLRLINE}, /* TAG_DIV */
1.29 kristaps 52: {"h1", 0}, /* TAG_H1 */
53: {"h2", 0}, /* TAG_H2 */
1.30 kristaps 54: {"p", HTML_CLRLINE}, /* TAG_P */
1.29 kristaps 55: {"span", 0}, /* TAG_SPAN */
1.30 kristaps 56: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57: {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58: {"a", 0}, /* TAG_A */
1.33 kristaps 59: {"table", HTML_CLRLINE}, /* TAG_TABLE */
60: {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61: {"tr", HTML_CLRLINE}, /* TAG_TR */
62: {"td", HTML_CLRLINE}, /* TAG_TD */
1.34 kristaps 63: {"li", HTML_CLRLINE}, /* TAG_LI */
64: {"ul", HTML_CLRLINE}, /* TAG_UL */
65: {"ol", HTML_CLRLINE}, /* TAG_OL */
1.41 kristaps 66: {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
1.29 kristaps 67: };
1.10 kristaps 68:
1.90 ! kristaps 69: static const char *const htmlfonts[HTMLFONT_MAX] = {
! 70: "roman",
! 71: "bold",
! 72: "italic"
! 73: };
! 74:
1.82 kristaps 75: static const char *const htmlattrs[ATTR_MAX] = {
1.29 kristaps 76: "http-equiv",
77: "content",
78: "name",
79: "rel",
80: "href",
81: "type",
82: "media",
1.33 kristaps 83: "class",
84: "style",
85: "width",
86: "valign",
1.54 kristaps 87: "target",
1.57 kristaps 88: "id",
1.67 kristaps 89: "summary",
1.29 kristaps 90: };
1.10 kristaps 91:
1.33 kristaps 92: #ifdef __linux__
1.43 kristaps 93: extern int getsubopt(char **, char * const *, char **);
1.33 kristaps 94: #endif
1.29 kristaps 95:
1.82 kristaps 96:
1.83 kristaps 97: static void print_spec(struct html *, const char *, size_t);
98: static void print_res(struct html *, const char *, size_t);
1.82 kristaps 99: static void print_ctag(struct html *, enum htmltag);
1.88 kristaps 100: static int print_encode(struct html *, const char *, int);
101: static void print_metaf(struct html *, enum roffdeco);
1.82 kristaps 102:
103:
1.29 kristaps 104: void *
1.43 kristaps 105: html_alloc(char *outopts)
1.10 kristaps 106: {
1.30 kristaps 107: struct html *h;
1.63 kristaps 108: const char *toks[4];
109: char *v;
1.43 kristaps 110:
111: toks[0] = "style";
1.53 kristaps 112: toks[1] = "man";
1.54 kristaps 113: toks[2] = "includes";
114: toks[3] = NULL;
1.30 kristaps 115:
1.72 kristaps 116: h = calloc(1, sizeof(struct html));
117: if (NULL == h) {
1.75 kristaps 118: perror(NULL);
1.72 kristaps 119: exit(EXIT_FAILURE);
120: }
1.10 kristaps 121:
1.66 kristaps 122: h->tags.head = NULL;
123: h->ords.head = NULL;
1.72 kristaps 124: h->symtab = chars_init(CHARS_HTML);
1.41 kristaps 125:
1.47 kristaps 126: while (outopts && *outopts)
1.63 kristaps 127: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
1.43 kristaps 128: case (0):
129: h->style = v;
130: break;
131: case (1):
1.53 kristaps 132: h->base_man = v;
1.43 kristaps 133: break;
1.54 kristaps 134: case (2):
135: h->base_includes = v;
136: break;
1.43 kristaps 137: default:
138: break;
139: }
140:
1.30 kristaps 141: return(h);
1.29 kristaps 142: }
1.10 kristaps 143:
1.33 kristaps 144:
1.29 kristaps 145: void
146: html_free(void *p)
147: {
1.30 kristaps 148: struct tag *tag;
1.37 kristaps 149: struct ord *ord;
1.30 kristaps 150: struct html *h;
151:
152: h = (struct html *)p;
1.10 kristaps 153:
1.66 kristaps 154: while ((ord = h->ords.head) != NULL) {
155: h->ords.head = ord->next;
1.37 kristaps 156: free(ord);
157: }
158:
1.66 kristaps 159: while ((tag = h->tags.head) != NULL) {
160: h->tags.head = tag->next;
1.30 kristaps 161: free(tag);
162: }
1.36 kristaps 163:
164: if (h->symtab)
165: chars_free(h->symtab);
1.53 kristaps 166:
1.30 kristaps 167: free(h);
1.10 kristaps 168: }
1.2 kristaps 169:
1.33 kristaps 170:
1.51 kristaps 171: void
1.29 kristaps 172: print_gen_head(struct html *h)
173: {
1.41 kristaps 174: struct htmlpair tag[4];
175:
176: tag[0].key = ATTR_HTTPEQUIV;
177: tag[0].val = "Content-Type";
178: tag[1].key = ATTR_CONTENT;
179: tag[1].val = "text/html; charset=utf-8";
180: print_otag(h, TAG_META, 2, tag);
181:
182: tag[0].key = ATTR_NAME;
183: tag[0].val = "resource-type";
184: tag[1].key = ATTR_CONTENT;
185: tag[1].val = "document";
186: print_otag(h, TAG_META, 2, tag);
187:
188: if (h->style) {
189: tag[0].key = ATTR_REL;
190: tag[0].val = "stylesheet";
191: tag[1].key = ATTR_HREF;
192: tag[1].val = h->style;
193: tag[2].key = ATTR_TYPE;
194: tag[2].val = "text/css";
195: tag[3].key = ATTR_MEDIA;
196: tag[3].val = "all";
197: print_otag(h, TAG_LINK, 4, tag);
198: }
1.4 kristaps 199: }
200:
1.33 kristaps 201:
1.29 kristaps 202: static void
1.83 kristaps 203: print_spec(struct html *h, const char *p, size_t len)
1.32 kristaps 204: {
205: const char *rhs;
206: size_t sz;
207:
1.83 kristaps 208: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.32 kristaps 209:
210: if (NULL == rhs)
211: return;
1.76 kristaps 212: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 213: }
214:
1.33 kristaps 215:
1.32 kristaps 216: static void
1.83 kristaps 217: print_res(struct html *h, const char *p, size_t len)
1.32 kristaps 218: {
219: const char *rhs;
220: size_t sz;
221:
1.83 kristaps 222: rhs = chars_a2res(h->symtab, p, len, &sz);
1.32 kristaps 223:
224: if (NULL == rhs)
225: return;
1.76 kristaps 226: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 227: }
228:
1.33 kristaps 229:
1.90 ! kristaps 230: struct tag *
! 231: print_ofont(struct html *h, enum htmlfont font)
! 232: {
! 233: struct htmlpair tag;
! 234:
! 235: h->metal = h->metac;
! 236: h->metac = font;
! 237:
! 238: /* FIXME: DECO_ROMAN should just close out preexisting. */
! 239:
! 240: if (h->metaf)
! 241: print_tagq(h, h->metaf);
! 242:
! 243: PAIR_CLASS_INIT(&tag, htmlfonts[font]);
! 244: h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
! 245: return(h->metaf);
! 246: }
! 247:
! 248:
1.88 kristaps 249: static void
250: print_metaf(struct html *h, enum roffdeco deco)
251: {
1.90 ! kristaps 252: enum htmlfont font;
1.88 kristaps 253:
254: switch (deco) {
1.90 ! kristaps 255: case (DECO_PREVIOUS):
! 256: font = h->metal;
1.88 kristaps 257: break;
258: case (DECO_ITALIC):
1.90 ! kristaps 259: font = HTMLFONT_ITALIC;
! 260: break;
! 261: case (DECO_BOLD):
! 262: font = HTMLFONT_BOLD;
1.88 kristaps 263: break;
264: case (DECO_ROMAN):
1.90 ! kristaps 265: font = HTMLFONT_NONE;
1.88 kristaps 266: break;
267: default:
268: abort();
269: /* NOTREACHED */
270: }
271:
1.90 ! kristaps 272: (void)print_ofont(h, font);
1.88 kristaps 273: }
274:
275:
1.85 kristaps 276: static int
1.88 kristaps 277: print_encode(struct html *h, const char *p, int norecurse)
1.29 kristaps 278: {
1.77 kristaps 279: size_t sz;
1.85 kristaps 280: int len, nospace;
1.82 kristaps 281: const char *seq;
282: enum roffdeco deco;
1.14 kristaps 283:
1.85 kristaps 284: nospace = 0;
285:
1.32 kristaps 286: for (; *p; p++) {
1.77 kristaps 287: sz = strcspn(p, "\\<>&");
288:
289: fwrite(p, 1, sz, stdout);
1.80 kristaps 290: p += /* LINTED */
291: sz;
1.77 kristaps 292:
1.82 kristaps 293: if ('<' == *p) {
294: printf("<");
295: continue;
296: } else if ('>' == *p) {
297: printf(">");
298: continue;
299: } else if ('&' == *p) {
300: printf("&");
1.34 kristaps 301: continue;
1.77 kristaps 302: } else if ('\0' == *p)
303: break;
304:
1.82 kristaps 305: seq = ++p;
306: len = a2roffdeco(&deco, &seq, &sz);
307:
308: switch (deco) {
309: case (DECO_RESERVED):
310: print_res(h, seq, sz);
311: break;
312: case (DECO_SPECIAL):
313: print_spec(h, seq, sz);
314: break;
1.90 ! kristaps 315: case (DECO_PREVIOUS):
! 316: /* FALLTHROUGH */
1.88 kristaps 317: case (DECO_BOLD):
318: /* FALLTHROUGH */
319: case (DECO_ITALIC):
320: /* FALLTHROUGH */
321: case (DECO_ROMAN):
322: if (norecurse)
323: break;
324: print_metaf(h, deco);
325: break;
1.82 kristaps 326: default:
327: break;
328: }
329:
330: p += len - 1;
1.84 kristaps 331:
332: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
1.85 kristaps 333: nospace = 1;
1.32 kristaps 334: }
1.85 kristaps 335:
336: return(nospace);
1.14 kristaps 337: }
338:
339:
1.51 kristaps 340: struct tag *
1.29 kristaps 341: print_otag(struct html *h, enum htmltag tag,
342: int sz, const struct htmlpair *p)
1.14 kristaps 343: {
1.29 kristaps 344: int i;
1.30 kristaps 345: struct tag *t;
346:
347: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.72 kristaps 348: t = malloc(sizeof(struct tag));
349: if (NULL == t) {
1.75 kristaps 350: perror(NULL);
1.72 kristaps 351: exit(EXIT_FAILURE);
352: }
1.30 kristaps 353: t->tag = tag;
1.66 kristaps 354: t->next = h->tags.head;
355: h->tags.head = t;
1.30 kristaps 356: } else
357: t = NULL;
1.29 kristaps 358:
359: if ( ! (HTML_NOSPACE & h->flags))
1.30 kristaps 360: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.78 kristaps 361: putchar(' ');
1.29 kristaps 362:
363: printf("<%s", htmltags[tag].name);
364: for (i = 0; i < sz; i++) {
365: printf(" %s=\"", htmlattrs[p[i].key]);
366: assert(p->val);
1.88 kristaps 367: (void)print_encode(h, p[i].val, 1);
1.78 kristaps 368: putchar('\"');
1.29 kristaps 369: }
1.78 kristaps 370: putchar('>');
1.14 kristaps 371:
1.29 kristaps 372: h->flags |= HTML_NOSPACE;
1.30 kristaps 373: return(t);
1.14 kristaps 374: }
375:
376:
1.29 kristaps 377: static void
378: print_ctag(struct html *h, enum htmltag tag)
1.14 kristaps 379: {
380:
1.29 kristaps 381: printf("</%s>", htmltags[tag].name);
1.71 kristaps 382: if (HTML_CLRLINE & htmltags[tag].flags) {
1.29 kristaps 383: h->flags |= HTML_NOSPACE;
1.78 kristaps 384: putchar('\n');
1.87 kristaps 385: }
1.14 kristaps 386: }
387:
388:
1.29 kristaps 389: /* ARGSUSED */
1.51 kristaps 390: void
1.29 kristaps 391: print_gen_doctype(struct html *h)
1.1 kristaps 392: {
1.29 kristaps 393:
1.46 kristaps 394: printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
1.1 kristaps 395: }
396:
397:
1.51 kristaps 398: void
1.29 kristaps 399: print_text(struct html *h, const char *p)
1.1 kristaps 400: {
401:
1.29 kristaps 402: if (*p && 0 == *(p + 1))
403: switch (*p) {
404: case('.'):
405: /* FALLTHROUGH */
406: case(','):
407: /* FALLTHROUGH */
408: case(';'):
409: /* FALLTHROUGH */
410: case(':'):
411: /* FALLTHROUGH */
412: case('?'):
413: /* FALLTHROUGH */
414: case('!'):
415: /* FALLTHROUGH */
416: case(')'):
417: /* FALLTHROUGH */
418: case(']'):
419: /* FALLTHROUGH */
420: case('}'):
1.52 kristaps 421: if ( ! (HTML_IGNDELIM & h->flags))
422: h->flags |= HTML_NOSPACE;
1.30 kristaps 423: break;
1.29 kristaps 424: default:
425: break;
426: }
1.1 kristaps 427:
1.29 kristaps 428: if ( ! (h->flags & HTML_NOSPACE))
1.78 kristaps 429: putchar(' ');
1.30 kristaps 430:
1.86 kristaps 431: assert(p);
1.88 kristaps 432: if ( ! print_encode(h, p, 0))
1.86 kristaps 433: h->flags &= ~HTML_NOSPACE;
1.8 kristaps 434:
1.29 kristaps 435: if (*p && 0 == *(p + 1))
436: switch (*p) {
437: case('('):
438: /* FALLTHROUGH */
439: case('['):
440: /* FALLTHROUGH */
441: case('{'):
442: h->flags |= HTML_NOSPACE;
1.30 kristaps 443: break;
1.29 kristaps 444: default:
445: break;
446: }
1.1 kristaps 447: }
1.30 kristaps 448:
449:
1.51 kristaps 450: void
1.30 kristaps 451: print_tagq(struct html *h, const struct tag *until)
452: {
453: struct tag *tag;
454:
1.66 kristaps 455: while ((tag = h->tags.head) != NULL) {
1.89 kristaps 456: if (tag == h->metaf)
457: h->metaf = NULL;
1.30 kristaps 458: print_ctag(h, tag->tag);
1.66 kristaps 459: h->tags.head = tag->next;
1.30 kristaps 460: free(tag);
461: if (until && tag == until)
462: return;
463: }
464: }
465:
466:
1.51 kristaps 467: void
1.30 kristaps 468: print_stagq(struct html *h, const struct tag *suntil)
469: {
470: struct tag *tag;
471:
1.66 kristaps 472: while ((tag = h->tags.head) != NULL) {
1.30 kristaps 473: if (suntil && tag == suntil)
474: return;
1.89 kristaps 475: if (tag == h->metaf)
476: h->metaf = NULL;
1.30 kristaps 477: print_ctag(h, tag->tag);
1.66 kristaps 478: h->tags.head = tag->next;
1.30 kristaps 479: free(tag);
480: }
481: }
1.55 kristaps 482:
483:
484: void
485: bufinit(struct html *h)
486: {
487:
488: h->buf[0] = '\0';
489: h->buflen = 0;
490: }
491:
492:
493: void
1.58 kristaps 494: bufcat_style(struct html *h, const char *key, const char *val)
495: {
496:
497: bufcat(h, key);
498: bufncat(h, ":", 1);
499: bufcat(h, val);
500: bufncat(h, ";", 1);
501: }
502:
503:
504: void
1.55 kristaps 505: bufcat(struct html *h, const char *p)
506: {
507:
508: bufncat(h, p, strlen(p));
509: }
510:
511:
512: void
513: buffmt(struct html *h, const char *fmt, ...)
514: {
515: va_list ap;
516:
517: va_start(ap, fmt);
1.56 kristaps 518: (void)vsnprintf(h->buf + (int)h->buflen,
1.55 kristaps 519: BUFSIZ - h->buflen - 1, fmt, ap);
520: va_end(ap);
521: h->buflen = strlen(h->buf);
522: }
523:
524:
525: void
526: bufncat(struct html *h, const char *p, size_t sz)
527: {
528:
529: if (h->buflen + sz > BUFSIZ - 1)
530: sz = BUFSIZ - 1 - h->buflen;
531:
532: (void)strncat(h->buf, p, sz);
533: h->buflen += sz;
534: }
535:
536:
537: void
538: buffmt_includes(struct html *h, const char *name)
539: {
540: const char *p, *pp;
541:
542: pp = h->base_includes;
1.61 kristaps 543:
544: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 545: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 546: switch (*(p + 1)) {
547: case('I'):
548: bufcat(h, name);
549: break;
550: default:
551: bufncat(h, p, 2);
552: break;
553: }
554: pp = p + 2;
555: }
556: if (pp)
557: bufcat(h, pp);
558: }
559:
560:
561: void
562: buffmt_man(struct html *h,
563: const char *name, const char *sec)
564: {
565: const char *p, *pp;
566:
567: pp = h->base_man;
1.61 kristaps 568:
569: /* LINTED */
570: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 571: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 572: switch (*(p + 1)) {
573: case('S'):
1.58 kristaps 574: bufcat(h, sec ? sec : "1");
1.55 kristaps 575: break;
576: case('N'):
1.58 kristaps 577: buffmt(h, name);
1.55 kristaps 578: break;
579: default:
580: bufncat(h, p, 2);
581: break;
582: }
583: pp = p + 2;
584: }
585: if (pp)
586: bufcat(h, pp);
587: }
1.58 kristaps 588:
589:
590: void
591: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
592: {
1.62 kristaps 593: double v;
1.63 kristaps 594: const char *u;
1.58 kristaps 595:
596: v = su->scale;
597:
598: switch (su->unit) {
599: case (SCALE_CM):
600: u = "cm";
601: break;
602: case (SCALE_IN):
603: u = "in";
604: break;
605: case (SCALE_PC):
606: u = "pc";
607: break;
608: case (SCALE_PT):
609: u = "pt";
610: break;
1.59 kristaps 611: case (SCALE_EM):
612: u = "em";
613: break;
1.58 kristaps 614: case (SCALE_MM):
615: if (0 == (v /= 100))
616: v = 1;
617: u = "em";
618: break;
1.59 kristaps 619: case (SCALE_EN):
620: u = "ex";
621: break;
622: case (SCALE_BU):
623: u = "ex";
624: break;
1.58 kristaps 625: case (SCALE_VS):
626: u = "em";
627: break;
628: default:
629: u = "ex";
630: break;
631: }
632:
1.62 kristaps 633: if (su->pt)
634: buffmt(h, "%s: %f%s;", p, v, u);
635: else
636: /* LINTED */
637: buffmt(h, "%s: %d%s;", p, (int)v, u);
1.58 kristaps 638: }
1.65 kristaps 639:
1.68 kristaps 640:
641: void
1.70 kristaps 642: html_idcat(char *dst, const char *src, int sz)
1.68 kristaps 643: {
1.70 kristaps 644: int ssz;
1.68 kristaps 645:
646: assert(sz);
647:
648: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
649:
1.70 kristaps 650: for ( ; *dst != '\0' && sz; dst++, sz--)
1.68 kristaps 651: /* Jump to end. */ ;
652:
1.70 kristaps 653: assert(sz > 2);
1.68 kristaps 654:
1.70 kristaps 655: /* We can't start with a number (bah). */
1.68 kristaps 656:
1.70 kristaps 657: *dst++ = 'x';
1.68 kristaps 658: *dst = '\0';
1.70 kristaps 659: sz--;
660:
661: for ( ; *src != '\0' && sz > 1; src++) {
1.73 kristaps 662: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
1.70 kristaps 663: sz -= ssz;
664: dst += ssz;
665: }
1.68 kristaps 666: }
CVSweb