Annotation of mandoc/html.c, Revision 1.88
1.88 ! kristaps 1: /* $Id: html.c,v 1.87 2009/11/14 12:04:59 kristaps Exp $ */
1.1 kristaps 2: /*
1.29 kristaps 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.29 kristaps 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.41 kristaps 17: #include <sys/types.h>
1.30 kristaps 18:
1.1 kristaps 19: #include <assert.h>
1.68 kristaps 20: #include <ctype.h>
1.76 kristaps 21: #include <stdarg.h>
1.29 kristaps 22: #include <stdio.h>
1.63 kristaps 23: #include <stdint.h>
1.1 kristaps 24: #include <stdlib.h>
1.33 kristaps 25: #include <string.h>
1.45 kristaps 26: #include <unistd.h>
1.1 kristaps 27:
1.58 kristaps 28: #include "out.h"
1.32 kristaps 29: #include "chars.h"
1.51 kristaps 30: #include "html.h"
1.64 kristaps 31: #include "main.h"
1.2 kristaps 32:
1.63 kristaps 33: #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
34:
1.29 kristaps 35: #define DOCTYPE "-//W3C//DTD HTML 4.01//EN"
36: #define DTD "http://www.w3.org/TR/html4/strict.dtd"
1.8 kristaps 37:
1.29 kristaps 38: struct htmldata {
1.63 kristaps 39: const char *name;
1.29 kristaps 40: int flags;
1.30 kristaps 41: #define HTML_CLRLINE (1 << 0)
42: #define HTML_NOSTACK (1 << 1)
1.29 kristaps 43: };
1.7 kristaps 44:
1.29 kristaps 45: static const struct htmldata htmltags[TAG_MAX] = {
1.30 kristaps 46: {"html", HTML_CLRLINE}, /* TAG_HTML */
47: {"head", HTML_CLRLINE}, /* TAG_HEAD */
48: {"body", HTML_CLRLINE}, /* TAG_BODY */
49: {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
1.33 kristaps 50: {"title", HTML_CLRLINE}, /* TAG_TITLE */
1.30 kristaps 51: {"div", HTML_CLRLINE}, /* TAG_DIV */
1.29 kristaps 52: {"h1", 0}, /* TAG_H1 */
53: {"h2", 0}, /* TAG_H2 */
1.30 kristaps 54: {"p", HTML_CLRLINE}, /* TAG_P */
1.29 kristaps 55: {"span", 0}, /* TAG_SPAN */
1.30 kristaps 56: {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57: {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58: {"a", 0}, /* TAG_A */
1.33 kristaps 59: {"table", HTML_CLRLINE}, /* TAG_TABLE */
60: {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61: {"tr", HTML_CLRLINE}, /* TAG_TR */
62: {"td", HTML_CLRLINE}, /* TAG_TD */
1.34 kristaps 63: {"li", HTML_CLRLINE}, /* TAG_LI */
64: {"ul", HTML_CLRLINE}, /* TAG_UL */
65: {"ol", HTML_CLRLINE}, /* TAG_OL */
1.41 kristaps 66: {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
1.29 kristaps 67: };
1.10 kristaps 68:
1.82 kristaps 69: static const char *const htmlattrs[ATTR_MAX] = {
1.29 kristaps 70: "http-equiv",
71: "content",
72: "name",
73: "rel",
74: "href",
75: "type",
76: "media",
1.33 kristaps 77: "class",
78: "style",
79: "width",
80: "valign",
1.54 kristaps 81: "target",
1.57 kristaps 82: "id",
1.67 kristaps 83: "summary",
1.29 kristaps 84: };
1.10 kristaps 85:
1.33 kristaps 86: #ifdef __linux__
1.43 kristaps 87: extern int getsubopt(char **, char * const *, char **);
1.33 kristaps 88: #endif
1.29 kristaps 89:
1.82 kristaps 90:
1.83 kristaps 91: static void print_spec(struct html *, const char *, size_t);
92: static void print_res(struct html *, const char *, size_t);
1.82 kristaps 93: static void print_ctag(struct html *, enum htmltag);
1.88 ! kristaps 94: static int print_encode(struct html *, const char *, int);
! 95: static void print_metaf(struct html *, enum roffdeco);
1.82 kristaps 96:
97:
1.29 kristaps 98: void *
1.43 kristaps 99: html_alloc(char *outopts)
1.10 kristaps 100: {
1.30 kristaps 101: struct html *h;
1.63 kristaps 102: const char *toks[4];
103: char *v;
1.43 kristaps 104:
105: toks[0] = "style";
1.53 kristaps 106: toks[1] = "man";
1.54 kristaps 107: toks[2] = "includes";
108: toks[3] = NULL;
1.30 kristaps 109:
1.72 kristaps 110: h = calloc(1, sizeof(struct html));
111: if (NULL == h) {
1.75 kristaps 112: perror(NULL);
1.72 kristaps 113: exit(EXIT_FAILURE);
114: }
1.10 kristaps 115:
1.66 kristaps 116: h->tags.head = NULL;
117: h->ords.head = NULL;
1.72 kristaps 118: h->symtab = chars_init(CHARS_HTML);
1.41 kristaps 119:
1.47 kristaps 120: while (outopts && *outopts)
1.63 kristaps 121: switch (getsubopt(&outopts, UNCONST(toks), &v)) {
1.43 kristaps 122: case (0):
123: h->style = v;
124: break;
125: case (1):
1.53 kristaps 126: h->base_man = v;
1.43 kristaps 127: break;
1.54 kristaps 128: case (2):
129: h->base_includes = v;
130: break;
1.43 kristaps 131: default:
132: break;
133: }
134:
1.30 kristaps 135: return(h);
1.29 kristaps 136: }
1.10 kristaps 137:
1.33 kristaps 138:
1.29 kristaps 139: void
140: html_free(void *p)
141: {
1.30 kristaps 142: struct tag *tag;
1.37 kristaps 143: struct ord *ord;
1.30 kristaps 144: struct html *h;
145:
146: h = (struct html *)p;
1.10 kristaps 147:
1.66 kristaps 148: while ((ord = h->ords.head) != NULL) {
149: h->ords.head = ord->next;
1.37 kristaps 150: free(ord);
151: }
152:
1.66 kristaps 153: while ((tag = h->tags.head) != NULL) {
154: h->tags.head = tag->next;
1.30 kristaps 155: free(tag);
156: }
1.36 kristaps 157:
158: if (h->symtab)
159: chars_free(h->symtab);
1.53 kristaps 160:
1.30 kristaps 161: free(h);
1.10 kristaps 162: }
1.2 kristaps 163:
1.33 kristaps 164:
1.51 kristaps 165: void
1.29 kristaps 166: print_gen_head(struct html *h)
167: {
1.41 kristaps 168: struct htmlpair tag[4];
169:
170: tag[0].key = ATTR_HTTPEQUIV;
171: tag[0].val = "Content-Type";
172: tag[1].key = ATTR_CONTENT;
173: tag[1].val = "text/html; charset=utf-8";
174: print_otag(h, TAG_META, 2, tag);
175:
176: tag[0].key = ATTR_NAME;
177: tag[0].val = "resource-type";
178: tag[1].key = ATTR_CONTENT;
179: tag[1].val = "document";
180: print_otag(h, TAG_META, 2, tag);
181:
182: if (h->style) {
183: tag[0].key = ATTR_REL;
184: tag[0].val = "stylesheet";
185: tag[1].key = ATTR_HREF;
186: tag[1].val = h->style;
187: tag[2].key = ATTR_TYPE;
188: tag[2].val = "text/css";
189: tag[3].key = ATTR_MEDIA;
190: tag[3].val = "all";
191: print_otag(h, TAG_LINK, 4, tag);
192: }
1.4 kristaps 193: }
194:
1.33 kristaps 195:
1.29 kristaps 196: static void
1.83 kristaps 197: print_spec(struct html *h, const char *p, size_t len)
1.32 kristaps 198: {
199: const char *rhs;
200: size_t sz;
201:
1.83 kristaps 202: rhs = chars_a2ascii(h->symtab, p, len, &sz);
1.32 kristaps 203:
204: if (NULL == rhs)
205: return;
1.76 kristaps 206: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 207: }
208:
1.33 kristaps 209:
1.32 kristaps 210: static void
1.83 kristaps 211: print_res(struct html *h, const char *p, size_t len)
1.32 kristaps 212: {
213: const char *rhs;
214: size_t sz;
215:
1.83 kristaps 216: rhs = chars_a2res(h->symtab, p, len, &sz);
1.32 kristaps 217:
218: if (NULL == rhs)
219: return;
1.76 kristaps 220: fwrite(rhs, 1, sz, stdout);
1.32 kristaps 221: }
222:
1.33 kristaps 223:
1.88 ! kristaps 224: static void
! 225: print_metaf(struct html *h, enum roffdeco deco)
! 226: {
! 227: const char *class;
! 228: struct htmlpair tag;
! 229:
! 230: switch (deco) {
! 231: case (DECO_BOLD):
! 232: class = "bold";
! 233: break;
! 234: case (DECO_ITALIC):
! 235: class = "italic";
! 236: break;
! 237: case (DECO_ROMAN):
! 238: class = "roman";
! 239: break;
! 240: default:
! 241: abort();
! 242: /* NOTREACHED */
! 243: }
! 244:
! 245: if (h->metaf) {
! 246: assert(h->tags.head);
! 247: assert(h->metaf == h->tags.head);
! 248: print_tagq(h, h->metaf);
! 249: }
! 250:
! 251: PAIR_CLASS_INIT(&tag, class);
! 252: h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
! 253: }
! 254:
! 255:
1.85 kristaps 256: static int
1.88 ! kristaps 257: print_encode(struct html *h, const char *p, int norecurse)
1.29 kristaps 258: {
1.77 kristaps 259: size_t sz;
1.85 kristaps 260: int len, nospace;
1.82 kristaps 261: const char *seq;
262: enum roffdeco deco;
1.14 kristaps 263:
1.85 kristaps 264: nospace = 0;
265:
1.32 kristaps 266: for (; *p; p++) {
1.77 kristaps 267: sz = strcspn(p, "\\<>&");
268:
269: fwrite(p, 1, sz, stdout);
1.80 kristaps 270: p += /* LINTED */
271: sz;
1.77 kristaps 272:
1.82 kristaps 273: if ('<' == *p) {
274: printf("<");
275: continue;
276: } else if ('>' == *p) {
277: printf(">");
278: continue;
279: } else if ('&' == *p) {
280: printf("&");
1.34 kristaps 281: continue;
1.77 kristaps 282: } else if ('\0' == *p)
283: break;
284:
1.82 kristaps 285: seq = ++p;
286: len = a2roffdeco(&deco, &seq, &sz);
287:
288: switch (deco) {
289: case (DECO_RESERVED):
290: print_res(h, seq, sz);
291: break;
292: case (DECO_SPECIAL):
293: print_spec(h, seq, sz);
294: break;
1.88 ! kristaps 295: case (DECO_BOLD):
! 296: /* FALLTHROUGH */
! 297: case (DECO_ITALIC):
! 298: /* FALLTHROUGH */
! 299: case (DECO_ROMAN):
! 300: if (norecurse)
! 301: break;
! 302: print_metaf(h, deco);
! 303: break;
1.82 kristaps 304: default:
305: break;
306: }
307:
308: p += len - 1;
1.84 kristaps 309:
310: if (DECO_NOSPACE == deco && '\0' == *(p + 1))
1.85 kristaps 311: nospace = 1;
1.32 kristaps 312: }
1.85 kristaps 313:
314: return(nospace);
1.14 kristaps 315: }
316:
317:
1.51 kristaps 318: struct tag *
1.29 kristaps 319: print_otag(struct html *h, enum htmltag tag,
320: int sz, const struct htmlpair *p)
1.14 kristaps 321: {
1.29 kristaps 322: int i;
1.30 kristaps 323: struct tag *t;
324:
325: if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
1.72 kristaps 326: t = malloc(sizeof(struct tag));
327: if (NULL == t) {
1.75 kristaps 328: perror(NULL);
1.72 kristaps 329: exit(EXIT_FAILURE);
330: }
1.30 kristaps 331: t->tag = tag;
1.66 kristaps 332: t->next = h->tags.head;
333: h->tags.head = t;
1.30 kristaps 334: } else
335: t = NULL;
1.29 kristaps 336:
337: if ( ! (HTML_NOSPACE & h->flags))
1.30 kristaps 338: if ( ! (HTML_CLRLINE & htmltags[tag].flags))
1.78 kristaps 339: putchar(' ');
1.29 kristaps 340:
341: printf("<%s", htmltags[tag].name);
342: for (i = 0; i < sz; i++) {
343: printf(" %s=\"", htmlattrs[p[i].key]);
344: assert(p->val);
1.88 ! kristaps 345: (void)print_encode(h, p[i].val, 1);
1.78 kristaps 346: putchar('\"');
1.29 kristaps 347: }
1.78 kristaps 348: putchar('>');
1.14 kristaps 349:
1.29 kristaps 350: h->flags |= HTML_NOSPACE;
1.30 kristaps 351: return(t);
1.14 kristaps 352: }
353:
354:
355: /* ARGSUSED */
1.29 kristaps 356: static void
357: print_ctag(struct html *h, enum htmltag tag)
1.14 kristaps 358: {
359:
1.29 kristaps 360: printf("</%s>", htmltags[tag].name);
1.71 kristaps 361: if (HTML_CLRLINE & htmltags[tag].flags) {
1.29 kristaps 362: h->flags |= HTML_NOSPACE;
1.78 kristaps 363: putchar('\n');
1.87 kristaps 364: }
1.14 kristaps 365: }
366:
367:
1.29 kristaps 368: /* ARGSUSED */
1.51 kristaps 369: void
1.29 kristaps 370: print_gen_doctype(struct html *h)
1.1 kristaps 371: {
1.29 kristaps 372:
1.46 kristaps 373: printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
1.1 kristaps 374: }
375:
376:
1.51 kristaps 377: void
1.29 kristaps 378: print_text(struct html *h, const char *p)
1.1 kristaps 379: {
380:
1.29 kristaps 381: if (*p && 0 == *(p + 1))
382: switch (*p) {
383: case('.'):
384: /* FALLTHROUGH */
385: case(','):
386: /* FALLTHROUGH */
387: case(';'):
388: /* FALLTHROUGH */
389: case(':'):
390: /* FALLTHROUGH */
391: case('?'):
392: /* FALLTHROUGH */
393: case('!'):
394: /* FALLTHROUGH */
395: case(')'):
396: /* FALLTHROUGH */
397: case(']'):
398: /* FALLTHROUGH */
399: case('}'):
1.52 kristaps 400: if ( ! (HTML_IGNDELIM & h->flags))
401: h->flags |= HTML_NOSPACE;
1.30 kristaps 402: break;
1.29 kristaps 403: default:
404: break;
405: }
1.1 kristaps 406:
1.29 kristaps 407: if ( ! (h->flags & HTML_NOSPACE))
1.78 kristaps 408: putchar(' ');
1.30 kristaps 409:
1.86 kristaps 410: assert(p);
1.88 ! kristaps 411: if ( ! print_encode(h, p, 0))
1.86 kristaps 412: h->flags &= ~HTML_NOSPACE;
1.8 kristaps 413:
1.29 kristaps 414: if (*p && 0 == *(p + 1))
415: switch (*p) {
416: case('('):
417: /* FALLTHROUGH */
418: case('['):
419: /* FALLTHROUGH */
420: case('{'):
421: h->flags |= HTML_NOSPACE;
1.30 kristaps 422: break;
1.29 kristaps 423: default:
424: break;
425: }
1.1 kristaps 426: }
1.30 kristaps 427:
428:
1.51 kristaps 429: void
1.30 kristaps 430: print_tagq(struct html *h, const struct tag *until)
431: {
432: struct tag *tag;
433:
1.66 kristaps 434: while ((tag = h->tags.head) != NULL) {
1.30 kristaps 435: print_ctag(h, tag->tag);
1.66 kristaps 436: h->tags.head = tag->next;
1.30 kristaps 437: free(tag);
438: if (until && tag == until)
439: return;
440: }
441: }
442:
443:
1.51 kristaps 444: void
1.30 kristaps 445: print_stagq(struct html *h, const struct tag *suntil)
446: {
447: struct tag *tag;
448:
1.66 kristaps 449: while ((tag = h->tags.head) != NULL) {
1.30 kristaps 450: if (suntil && tag == suntil)
451: return;
452: print_ctag(h, tag->tag);
1.66 kristaps 453: h->tags.head = tag->next;
1.30 kristaps 454: free(tag);
455: }
456: }
1.55 kristaps 457:
458:
459: void
460: bufinit(struct html *h)
461: {
462:
463: h->buf[0] = '\0';
464: h->buflen = 0;
465: }
466:
467:
468: void
1.58 kristaps 469: bufcat_style(struct html *h, const char *key, const char *val)
470: {
471:
472: bufcat(h, key);
473: bufncat(h, ":", 1);
474: bufcat(h, val);
475: bufncat(h, ";", 1);
476: }
477:
478:
479: void
1.55 kristaps 480: bufcat(struct html *h, const char *p)
481: {
482:
483: bufncat(h, p, strlen(p));
484: }
485:
486:
487: void
488: buffmt(struct html *h, const char *fmt, ...)
489: {
490: va_list ap;
491:
492: va_start(ap, fmt);
1.56 kristaps 493: (void)vsnprintf(h->buf + (int)h->buflen,
1.55 kristaps 494: BUFSIZ - h->buflen - 1, fmt, ap);
495: va_end(ap);
496: h->buflen = strlen(h->buf);
497: }
498:
499:
500: void
501: bufncat(struct html *h, const char *p, size_t sz)
502: {
503:
504: if (h->buflen + sz > BUFSIZ - 1)
505: sz = BUFSIZ - 1 - h->buflen;
506:
507: (void)strncat(h->buf, p, sz);
508: h->buflen += sz;
509: }
510:
511:
512: void
513: buffmt_includes(struct html *h, const char *name)
514: {
515: const char *p, *pp;
516:
517: pp = h->base_includes;
1.61 kristaps 518:
519: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 520: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 521: switch (*(p + 1)) {
522: case('I'):
523: bufcat(h, name);
524: break;
525: default:
526: bufncat(h, p, 2);
527: break;
528: }
529: pp = p + 2;
530: }
531: if (pp)
532: bufcat(h, pp);
533: }
534:
535:
536: void
537: buffmt_man(struct html *h,
538: const char *name, const char *sec)
539: {
540: const char *p, *pp;
541:
542: pp = h->base_man;
1.61 kristaps 543:
544: /* LINTED */
545: while (NULL != (p = strchr(pp, '%'))) {
1.56 kristaps 546: bufncat(h, pp, (size_t)(p - pp));
1.55 kristaps 547: switch (*(p + 1)) {
548: case('S'):
1.58 kristaps 549: bufcat(h, sec ? sec : "1");
1.55 kristaps 550: break;
551: case('N'):
1.58 kristaps 552: buffmt(h, name);
1.55 kristaps 553: break;
554: default:
555: bufncat(h, p, 2);
556: break;
557: }
558: pp = p + 2;
559: }
560: if (pp)
561: bufcat(h, pp);
562: }
1.58 kristaps 563:
564:
565: void
566: bufcat_su(struct html *h, const char *p, const struct roffsu *su)
567: {
1.62 kristaps 568: double v;
1.63 kristaps 569: const char *u;
1.58 kristaps 570:
571: v = su->scale;
572:
573: switch (su->unit) {
574: case (SCALE_CM):
575: u = "cm";
576: break;
577: case (SCALE_IN):
578: u = "in";
579: break;
580: case (SCALE_PC):
581: u = "pc";
582: break;
583: case (SCALE_PT):
584: u = "pt";
585: break;
1.59 kristaps 586: case (SCALE_EM):
587: u = "em";
588: break;
1.58 kristaps 589: case (SCALE_MM):
590: if (0 == (v /= 100))
591: v = 1;
592: u = "em";
593: break;
1.59 kristaps 594: case (SCALE_EN):
595: u = "ex";
596: break;
597: case (SCALE_BU):
598: u = "ex";
599: break;
1.58 kristaps 600: case (SCALE_VS):
601: u = "em";
602: break;
603: default:
604: u = "ex";
605: break;
606: }
607:
1.62 kristaps 608: if (su->pt)
609: buffmt(h, "%s: %f%s;", p, v, u);
610: else
611: /* LINTED */
612: buffmt(h, "%s: %d%s;", p, (int)v, u);
1.58 kristaps 613: }
1.65 kristaps 614:
1.68 kristaps 615:
616: void
1.70 kristaps 617: html_idcat(char *dst, const char *src, int sz)
1.68 kristaps 618: {
1.70 kristaps 619: int ssz;
1.68 kristaps 620:
621: assert(sz);
622:
623: /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
624:
1.70 kristaps 625: for ( ; *dst != '\0' && sz; dst++, sz--)
1.68 kristaps 626: /* Jump to end. */ ;
627:
1.70 kristaps 628: assert(sz > 2);
1.68 kristaps 629:
1.70 kristaps 630: /* We can't start with a number (bah). */
1.68 kristaps 631:
1.70 kristaps 632: *dst++ = 'x';
1.68 kristaps 633: *dst = '\0';
1.70 kristaps 634: sz--;
635:
636: for ( ; *src != '\0' && sz > 1; src++) {
1.73 kristaps 637: ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
1.70 kristaps 638: sz -= ssz;
639: dst += ssz;
640: }
1.68 kristaps 641: }
CVSweb