Annotation of mandoc/html.c, Revision 1.262
1.262 ! schwarze 1: /* $Id: html.c,v 1.261 2019/09/05 13:35:04 schwarze Exp $ */
1.1 kristaps 2: /*
1.176 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.249 schwarze 4: * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.186 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.29 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.186 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.29 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 17: */
1.92 kristaps 18: #include "config.h"
19:
1.41 kristaps 20: #include <sys/types.h>
1.240 schwarze 21: #include <sys/stat.h>
1.30 kristaps 22:
1.1 kristaps 23: #include <assert.h>
1.68 kristaps 24: #include <ctype.h>
1.76 kristaps 25: #include <stdarg.h>
1.229 schwarze 26: #include <stddef.h>
1.29 kristaps 27: #include <stdio.h>
1.63 kristaps 28: #include <stdint.h>
1.1 kristaps 29: #include <stdlib.h>
1.33 kristaps 30: #include <string.h>
1.45 kristaps 31: #include <unistd.h>
1.1 kristaps 32:
1.210 schwarze 33: #include "mandoc_aux.h"
1.229 schwarze 34: #include "mandoc_ohash.h"
1.100 kristaps 35: #include "mandoc.h"
1.210 schwarze 36: #include "roff.h"
1.58 kristaps 37: #include "out.h"
1.51 kristaps 38: #include "html.h"
1.186 schwarze 39: #include "manconf.h"
1.64 kristaps 40: #include "main.h"
1.63 kristaps 41:
1.29 kristaps 42: struct htmldata {
1.63 kristaps 43: const char *name;
1.29 kristaps 44: int flags;
1.257 schwarze 45: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
46: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
47: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
48: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
49: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
50: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
51: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.196 schwarze 52: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
53: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
54: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.257 schwarze 55: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
56: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.29 kristaps 57: };
1.7 kristaps 58:
1.29 kristaps 59: static const struct htmldata htmltags[TAG_MAX] = {
1.196 schwarze 60: {"html", HTML_NLALL},
61: {"head", HTML_NLALL | HTML_INDENT},
1.257 schwarze 62: {"meta", HTML_NOSTACK | HTML_NLALL},
63: {"link", HTML_NOSTACK | HTML_NLALL},
64: {"style", HTML_NLALL | HTML_INDENT},
65: {"title", HTML_NLAROUND},
1.196 schwarze 66: {"body", HTML_NLALL},
67: {"div", HTML_NLAROUND},
1.253 schwarze 68: {"section", HTML_NLALL},
1.196 schwarze 69: {"table", HTML_NLALL | HTML_INDENT},
70: {"tr", HTML_NLALL | HTML_INDENT},
71: {"td", HTML_NLAROUND},
72: {"li", HTML_NLAROUND | HTML_INDENT},
73: {"ul", HTML_NLALL | HTML_INDENT},
74: {"ol", HTML_NLALL | HTML_INDENT},
75: {"dl", HTML_NLALL | HTML_INDENT},
76: {"dt", HTML_NLAROUND},
77: {"dd", HTML_NLAROUND | HTML_INDENT},
1.257 schwarze 78: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
79: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
80: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
81: {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
82: {"a", HTML_INPHRASE | HTML_TOPHRASE},
83: {"b", HTML_INPHRASE | HTML_TOPHRASE},
84: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
85: {"code", HTML_INPHRASE | HTML_TOPHRASE},
86: {"i", HTML_INPHRASE | HTML_TOPHRASE},
87: {"small", HTML_INPHRASE | HTML_TOPHRASE},
88: {"span", HTML_INPHRASE | HTML_TOPHRASE},
89: {"var", HTML_INPHRASE | HTML_TOPHRASE},
90: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.262 ! schwarze 91: {"mark", HTML_INPHRASE | HTML_NOSTACK },
1.257 schwarze 92: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.196 schwarze 93: {"mrow", 0},
94: {"mi", 0},
1.215 schwarze 95: {"mn", 0},
1.196 schwarze 96: {"mo", 0},
97: {"msup", 0},
98: {"msub", 0},
99: {"msubsup", 0},
100: {"mfrac", 0},
101: {"msqrt", 0},
102: {"mfenced", 0},
103: {"mtable", 0},
104: {"mtr", 0},
105: {"mtd", 0},
106: {"munderover", 0},
107: {"munder", 0},
108: {"mover", 0},
1.90 kristaps 109: };
110:
1.229 schwarze 111: /* Avoid duplicate HTML id= attributes. */
112: static struct ohash id_unique;
113:
1.254 schwarze 114: static void html_reset_internal(struct html *);
1.197 schwarze 115: static void print_byte(struct html *, char);
116: static void print_endword(struct html *);
117: static void print_indent(struct html *);
118: static void print_word(struct html *, const char *);
119:
1.184 schwarze 120: static void print_ctag(struct html *, struct tag *);
1.197 schwarze 121: static int print_escape(struct html *, char);
1.195 schwarze 122: static int print_encode(struct html *, const char *, const char *, int);
123: static void print_href(struct html *, const char *, const char *, int);
1.255 schwarze 124: static void print_metaf(struct html *);
1.82 kristaps 125:
1.156 schwarze 126:
1.180 schwarze 127: void *
1.191 schwarze 128: html_alloc(const struct manoutput *outopts)
1.10 kristaps 129: {
1.30 kristaps 130: struct html *h;
131:
1.128 kristaps 132: h = mandoc_calloc(1, sizeof(struct html));
1.10 kristaps 133:
1.204 schwarze 134: h->tag = NULL;
1.186 schwarze 135: h->style = outopts->style;
1.240 schwarze 136: if ((h->base_man1 = outopts->man) == NULL)
137: h->base_man2 = NULL;
138: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
139: *h->base_man2++ = '\0';
1.186 schwarze 140: h->base_includes = outopts->includes;
141: if (outopts->fragment)
142: h->oflags |= HTML_FRAGMENT;
1.241 schwarze 143: if (outopts->toc)
144: h->oflags |= HTML_TOC;
1.43 kristaps 145:
1.229 schwarze 146: mandoc_ohash_init(&id_unique, 4, 0);
147:
1.188 schwarze 148: return h;
1.29 kristaps 149: }
1.10 kristaps 150:
1.254 schwarze 151: static void
152: html_reset_internal(struct html *h)
1.29 kristaps 153: {
1.30 kristaps 154: struct tag *tag;
1.229 schwarze 155: char *cp;
156: unsigned int slot;
1.30 kristaps 157:
1.204 schwarze 158: while ((tag = h->tag) != NULL) {
159: h->tag = tag->next;
1.30 kristaps 160: free(tag);
161: }
1.229 schwarze 162: cp = ohash_first(&id_unique, &slot);
163: while (cp != NULL) {
164: free(cp);
165: cp = ohash_next(&id_unique, &slot);
166: }
167: ohash_delete(&id_unique);
1.254 schwarze 168: }
169:
170: void
171: html_reset(void *p)
172: {
173: html_reset_internal(p);
174: mandoc_ohash_init(&id_unique, 4, 0);
175: }
176:
177: void
178: html_free(void *p)
179: {
180: html_reset_internal(p);
181: free(p);
1.10 kristaps 182: }
1.2 kristaps 183:
1.51 kristaps 184: void
1.29 kristaps 185: print_gen_head(struct html *h)
186: {
1.165 kristaps 187: struct tag *t;
1.41 kristaps 188:
1.194 schwarze 189: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.222 schwarze 190: if (h->style != NULL) {
191: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
192: h->style, "type", "text/css", "media", "all");
193: return;
194: }
1.165 kristaps 195:
1.168 kristaps 196: /*
1.222 schwarze 197: * Print a minimal embedded style sheet.
1.168 kristaps 198: */
1.196 schwarze 199:
1.194 schwarze 200: t = print_otag(h, TAG_STYLE, "");
1.196 schwarze 201: print_text(h, "table.head, table.foot { width: 100%; }");
1.197 schwarze 202: print_endline(h);
1.196 schwarze 203: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.197 schwarze 204: print_endline(h);
1.196 schwarze 205: print_text(h, "td.head-vol { text-align: center; }");
1.197 schwarze 206: print_endline(h);
1.256 schwarze 207: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.225 schwarze 208: print_endline(h);
1.256 schwarze 209: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.226 schwarze 210: print_endline(h);
1.256 schwarze 211: print_text(h, ".Ms { font-weight: bold; }");
1.228 schwarze 212: print_endline(h);
1.256 schwarze 213: print_text(h, ".Bl-diag ");
1.224 schwarze 214: print_byte(h, '>');
215: print_text(h, " dt { font-weight: bold; }");
1.223 schwarze 216: print_endline(h);
1.256 schwarze 217: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
218: "{ font-weight: bold; font-family: inherit; }");
1.165 kristaps 219: print_tagq(h, t);
1.4 kristaps 220: }
221:
1.255 schwarze 222: int
223: html_setfont(struct html *h, enum mandoc_esc font)
1.88 kristaps 224: {
1.255 schwarze 225: switch (font) {
1.156 schwarze 226: case ESCAPE_FONTPREV:
1.90 kristaps 227: font = h->metal;
1.88 kristaps 228: break;
1.156 schwarze 229: case ESCAPE_FONTITALIC:
230: case ESCAPE_FONTBOLD:
231: case ESCAPE_FONTBI:
1.242 schwarze 232: case ESCAPE_FONTCW:
1.255 schwarze 233: case ESCAPE_FONTROMAN:
1.242 schwarze 234: break;
1.156 schwarze 235: case ESCAPE_FONT:
1.255 schwarze 236: font = ESCAPE_FONTROMAN;
1.88 kristaps 237: break;
238: default:
1.255 schwarze 239: return 0;
1.88 kristaps 240: }
1.255 schwarze 241: h->metal = h->metac;
242: h->metac = font;
243: return 1;
244: }
1.88 kristaps 245:
1.255 schwarze 246: static void
247: print_metaf(struct html *h)
248: {
1.122 kristaps 249: if (h->metaf) {
250: print_tagq(h, h->metaf);
251: h->metaf = NULL;
252: }
1.255 schwarze 253: switch (h->metac) {
254: case ESCAPE_FONTITALIC:
1.194 schwarze 255: h->metaf = print_otag(h, TAG_I, "");
1.152 schwarze 256: break;
1.255 schwarze 257: case ESCAPE_FONTBOLD:
1.194 schwarze 258: h->metaf = print_otag(h, TAG_B, "");
1.152 schwarze 259: break;
1.255 schwarze 260: case ESCAPE_FONTBI:
1.194 schwarze 261: h->metaf = print_otag(h, TAG_B, "");
262: print_otag(h, TAG_I, "");
1.152 schwarze 263: break;
1.255 schwarze 264: case ESCAPE_FONTCW:
1.242 schwarze 265: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
266: break;
1.152 schwarze 267: default:
268: break;
269: }
1.248 schwarze 270: }
271:
1.249 schwarze 272: void
273: html_close_paragraph(struct html *h)
274: {
1.259 schwarze 275: struct tag *this, *next;
276: int flags;
1.249 schwarze 277:
1.259 schwarze 278: this = h->tag;
279: for (;;) {
280: next = this->next;
281: flags = htmltags[this->tag].flags;
282: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
283: print_ctag(h, this);
284: if ((flags & HTML_INPHRASE) == 0)
1.249 schwarze 285: break;
1.259 schwarze 286: this = next;
1.249 schwarze 287: }
288: }
289:
1.248 schwarze 290: /*
291: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
292: * TOKEN_NONE does not switch. The old mode is returned.
293: */
294: enum roff_tok
295: html_fillmode(struct html *h, enum roff_tok want)
296: {
297: struct tag *t;
298: enum roff_tok had;
299:
300: for (t = h->tag; t != NULL; t = t->next)
301: if (t->tag == TAG_PRE)
302: break;
303:
304: had = t == NULL ? ROFF_fi : ROFF_nf;
305:
306: if (want != had) {
307: switch (want) {
308: case ROFF_fi:
309: print_tagq(h, t);
310: break;
311: case ROFF_nf:
1.249 schwarze 312: html_close_paragraph(h);
1.248 schwarze 313: print_otag(h, TAG_PRE, "");
314: break;
315: case TOKEN_NONE:
316: break;
317: default:
318: abort();
319: }
320: }
321: return had;
1.210 schwarze 322: }
323:
324: char *
1.229 schwarze 325: html_make_id(const struct roff_node *n, int unique)
1.210 schwarze 326: {
327: const struct roff_node *nch;
1.229 schwarze 328: char *buf, *bufs, *cp;
329: unsigned int slot;
330: int suffix;
1.210 schwarze 331:
332: for (nch = n->child; nch != NULL; nch = nch->next)
333: if (nch->type != ROFFT_TEXT)
334: return NULL;
335:
336: buf = NULL;
337: deroff(&buf, n);
1.220 schwarze 338: if (buf == NULL)
339: return NULL;
1.210 schwarze 340:
1.230 schwarze 341: /*
342: * In ID attributes, only use ASCII characters that are
343: * permitted in URL-fragment strings according to the
344: * explicit list at:
345: * https://url.spec.whatwg.org/#url-fragment-string
346: */
1.210 schwarze 347:
348: for (cp = buf; *cp != '\0'; cp++)
1.230 schwarze 349: if (isalnum((unsigned char)*cp) == 0 &&
350: strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
1.210 schwarze 351: *cp = '_';
352:
1.229 schwarze 353: if (unique == 0)
354: return buf;
355:
356: /* Avoid duplicate HTML id= attributes. */
357:
358: bufs = NULL;
359: suffix = 1;
360: slot = ohash_qlookup(&id_unique, buf);
361: cp = ohash_find(&id_unique, slot);
362: if (cp != NULL) {
363: while (cp != NULL) {
364: free(bufs);
365: if (++suffix > 127) {
366: free(buf);
367: return NULL;
368: }
369: mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
370: slot = ohash_qlookup(&id_unique, bufs);
371: cp = ohash_find(&id_unique, slot);
372: }
373: free(buf);
374: buf = bufs;
375: }
376: ohash_insert(&id_unique, slot, buf);
1.210 schwarze 377: return buf;
1.88 kristaps 378: }
379:
1.85 kristaps 380: static int
1.197 schwarze 381: print_escape(struct html *h, char c)
1.159 schwarze 382: {
383:
384: switch (c) {
385: case '<':
1.197 schwarze 386: print_word(h, "<");
1.159 schwarze 387: break;
388: case '>':
1.197 schwarze 389: print_word(h, ">");
1.159 schwarze 390: break;
391: case '&':
1.197 schwarze 392: print_word(h, "&");
1.159 schwarze 393: break;
394: case '"':
1.197 schwarze 395: print_word(h, """);
1.159 schwarze 396: break;
397: case ASCII_NBRSP:
1.197 schwarze 398: print_word(h, " ");
1.159 schwarze 399: break;
400: case ASCII_HYPH:
1.197 schwarze 401: print_byte(h, '-');
1.189 schwarze 402: break;
1.159 schwarze 403: case ASCII_BREAK:
404: break;
405: default:
1.188 schwarze 406: return 0;
1.159 schwarze 407: }
1.188 schwarze 408: return 1;
1.159 schwarze 409: }
410:
411: static int
1.195 schwarze 412: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.29 kristaps 413: {
1.197 schwarze 414: char numbuf[16];
1.214 schwarze 415: const char *seq;
1.77 kristaps 416: size_t sz;
1.214 schwarze 417: int c, len, breakline, nospace;
1.132 kristaps 418: enum mandoc_esc esc;
1.214 schwarze 419: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.154 schwarze 420: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.14 kristaps 421:
1.195 schwarze 422: if (pend == NULL)
423: pend = strchr(p, '\0');
424:
1.214 schwarze 425: breakline = 0;
1.85 kristaps 426: nospace = 0;
427:
1.195 schwarze 428: while (p < pend) {
1.151 schwarze 429: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
430: h->flags &= ~HTML_SKIPCHAR;
431: p++;
432: continue;
433: }
434:
1.197 schwarze 435: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.214 schwarze 436: print_byte(h, *p);
437:
438: if (breakline &&
439: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.245 schwarze 440: print_otag(h, TAG_BR, "");
1.214 schwarze 441: breakline = 0;
442: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
443: p++;
444: continue;
445: }
1.77 kristaps 446:
1.195 schwarze 447: if (p >= pend)
1.132 kristaps 448: break;
449:
1.214 schwarze 450: if (*p == ' ') {
451: print_endword(h);
452: p++;
453: continue;
454: }
455:
1.197 schwarze 456: if (print_escape(h, *p++))
1.154 schwarze 457: continue;
1.77 kristaps 458:
1.132 kristaps 459: esc = mandoc_escape(&p, &seq, &len);
460: switch (esc) {
1.156 schwarze 461: case ESCAPE_FONT:
462: case ESCAPE_FONTPREV:
463: case ESCAPE_FONTBOLD:
464: case ESCAPE_FONTITALIC:
465: case ESCAPE_FONTBI:
1.242 schwarze 466: case ESCAPE_FONTCW:
1.156 schwarze 467: case ESCAPE_FONTROMAN:
1.243 schwarze 468: if (0 == norecurse) {
469: h->flags |= HTML_NOSPACE;
1.255 schwarze 470: if (html_setfont(h, esc))
471: print_metaf(h);
1.243 schwarze 472: h->flags &= ~HTML_NOSPACE;
473: }
1.151 schwarze 474: continue;
1.156 schwarze 475: case ESCAPE_SKIPCHAR:
1.151 schwarze 476: h->flags |= HTML_SKIPCHAR;
477: continue;
1.246 schwarze 478: case ESCAPE_ERROR:
479: continue;
1.151 schwarze 480: default:
481: break;
482: }
483:
484: if (h->flags & HTML_SKIPCHAR) {
485: h->flags &= ~HTML_SKIPCHAR;
486: continue;
487: }
488:
489: switch (esc) {
1.156 schwarze 490: case ESCAPE_UNICODE:
1.159 schwarze 491: /* Skip past "u" header. */
1.144 kristaps 492: c = mchars_num2uc(seq + 1, len - 1);
493: break;
1.156 schwarze 494: case ESCAPE_NUMBERED:
1.141 kristaps 495: c = mchars_num2char(seq, len);
1.181 schwarze 496: if (c < 0)
497: continue;
1.82 kristaps 498: break;
1.156 schwarze 499: case ESCAPE_SPECIAL:
1.191 schwarze 500: c = mchars_spec2cp(seq, len);
1.181 schwarze 501: if (c <= 0)
502: continue;
1.246 schwarze 503: break;
504: case ESCAPE_UNDEF:
505: c = *seq;
1.132 kristaps 506: break;
1.239 schwarze 507: case ESCAPE_DEVICE:
508: print_word(h, "html");
509: continue;
1.214 schwarze 510: case ESCAPE_BREAK:
511: breakline = 1;
512: continue;
1.156 schwarze 513: case ESCAPE_NOSPACE:
1.132 kristaps 514: if ('\0' == *p)
515: nospace = 1;
1.179 schwarze 516: continue;
1.185 schwarze 517: case ESCAPE_OVERSTRIKE:
518: if (len == 0)
519: continue;
520: c = seq[len - 1];
521: break;
1.82 kristaps 522: default:
1.179 schwarze 523: continue;
1.82 kristaps 524: }
1.181 schwarze 525: if ((c < 0x20 && c != 0x09) ||
526: (c > 0x7E && c < 0xA0))
1.179 schwarze 527: c = 0xFFFD;
1.197 schwarze 528: if (c > 0x7E) {
1.216 schwarze 529: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.197 schwarze 530: print_word(h, numbuf);
531: } else if (print_escape(h, c) == 0)
532: print_byte(h, c);
1.32 kristaps 533: }
1.85 kristaps 534:
1.188 schwarze 535: return nospace;
1.14 kristaps 536: }
537:
1.94 kristaps 538: static void
1.195 schwarze 539: print_href(struct html *h, const char *name, const char *sec, int man)
1.94 kristaps 540: {
1.240 schwarze 541: struct stat sb;
1.195 schwarze 542: const char *p, *pp;
1.240 schwarze 543: char *filename;
544:
545: if (man) {
546: pp = h->base_man1;
547: if (h->base_man2 != NULL) {
548: mandoc_asprintf(&filename, "%s.%s", name, sec);
549: if (stat(filename, &sb) == -1)
550: pp = h->base_man2;
551: free(filename);
552: }
553: } else
554: pp = h->base_includes;
1.195 schwarze 555:
556: while ((p = strchr(pp, '%')) != NULL) {
557: print_encode(h, pp, p, 1);
558: if (man && p[1] == 'S') {
559: if (sec == NULL)
1.197 schwarze 560: print_byte(h, '1');
1.195 schwarze 561: else
562: print_encode(h, sec, NULL, 1);
563: } else if ((man && p[1] == 'N') ||
564: (man == 0 && p[1] == 'I'))
565: print_encode(h, name, NULL, 1);
566: else
567: print_encode(h, p, p + 2, 1);
568: pp = p + 2;
569: }
570: if (*pp != '\0')
571: print_encode(h, pp, NULL, 1);
1.94 kristaps 572: }
573:
1.51 kristaps 574: struct tag *
1.194 schwarze 575: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.14 kristaps 576: {
1.194 schwarze 577: va_list ap;
1.30 kristaps 578: struct tag *t;
1.195 schwarze 579: const char *attr;
1.203 schwarze 580: char *arg1, *arg2;
1.244 schwarze 581: int style_written, tflags;
1.196 schwarze 582:
583: tflags = htmltags[tag].flags;
1.30 kristaps 584:
1.257 schwarze 585: /* Flow content is not allowed in phrasing context. */
586:
587: if ((tflags & HTML_INPHRASE) == 0) {
588: for (t = h->tag; t != NULL; t = t->next) {
589: if (t->closed)
590: continue;
591: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
592: break;
593: }
1.260 schwarze 594:
595: /*
596: * Always wrap phrasing elements in a paragraph
597: * unless already contained in some flow container;
598: * never put them directly into a section.
599: */
600:
601: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
602: print_otag(h, TAG_P, "c", "Pp");
1.257 schwarze 603:
1.204 schwarze 604: /* Push this tag onto the stack of open scopes. */
1.94 kristaps 605:
1.196 schwarze 606: if ((tflags & HTML_NOSTACK) == 0) {
1.128 kristaps 607: t = mandoc_malloc(sizeof(struct tag));
1.30 kristaps 608: t->tag = tag;
1.204 schwarze 609: t->next = h->tag;
1.252 schwarze 610: t->refcnt = 0;
611: t->closed = 0;
1.204 schwarze 612: h->tag = t;
1.30 kristaps 613: } else
614: t = NULL;
1.29 kristaps 615:
1.196 schwarze 616: if (tflags & HTML_NLBEFORE)
1.197 schwarze 617: print_endline(h);
618: if (h->col == 0)
619: print_indent(h);
1.196 schwarze 620: else if ((h->flags & HTML_NOSPACE) == 0) {
621: if (h->flags & HTML_KEEP)
1.216 schwarze 622: print_word(h, " ");
1.196 schwarze 623: else {
624: if (h->flags & HTML_PREKEEP)
625: h->flags |= HTML_KEEP;
1.197 schwarze 626: print_endword(h);
1.105 kristaps 627: }
1.196 schwarze 628: }
1.29 kristaps 629:
1.109 kristaps 630: if ( ! (h->flags & HTML_NONOSPACE))
631: h->flags &= ~HTML_NOSPACE;
1.110 kristaps 632: else
633: h->flags |= HTML_NOSPACE;
1.109 kristaps 634:
1.94 kristaps 635: /* Print out the tag name and attributes. */
636:
1.197 schwarze 637: print_byte(h, '<');
638: print_word(h, htmltags[tag].name);
1.194 schwarze 639:
640: va_start(ap, fmt);
641:
1.244 schwarze 642: while (*fmt != '\0' && *fmt != 's') {
1.203 schwarze 643:
1.238 schwarze 644: /* Parse attributes and arguments. */
1.203 schwarze 645:
646: arg1 = va_arg(ap, char *);
1.238 schwarze 647: arg2 = NULL;
1.194 schwarze 648: switch (*fmt++) {
649: case 'c':
1.195 schwarze 650: attr = "class";
1.194 schwarze 651: break;
652: case 'h':
1.195 schwarze 653: attr = "href";
1.194 schwarze 654: break;
655: case 'i':
1.195 schwarze 656: attr = "id";
1.194 schwarze 657: break;
658: case '?':
1.203 schwarze 659: attr = arg1;
660: arg1 = va_arg(ap, char *);
1.194 schwarze 661: break;
662: default:
663: abort();
664: }
1.203 schwarze 665: if (*fmt == 'M')
666: arg2 = va_arg(ap, char *);
667: if (arg1 == NULL)
668: continue;
669:
1.238 schwarze 670: /* Print the attributes. */
1.203 schwarze 671:
1.197 schwarze 672: print_byte(h, ' ');
673: print_word(h, attr);
674: print_byte(h, '=');
675: print_byte(h, '"');
1.195 schwarze 676: switch (*fmt) {
1.208 schwarze 677: case 'I':
678: print_href(h, arg1, NULL, 0);
679: fmt++;
680: break;
1.195 schwarze 681: case 'M':
1.203 schwarze 682: print_href(h, arg1, arg2, 1);
1.195 schwarze 683: fmt++;
684: break;
1.208 schwarze 685: case 'R':
686: print_byte(h, '#');
687: print_encode(h, arg1, NULL, 1);
1.195 schwarze 688: fmt++;
1.208 schwarze 689: break;
1.195 schwarze 690: default:
1.244 schwarze 691: print_encode(h, arg1, NULL, 1);
1.195 schwarze 692: break;
693: }
1.197 schwarze 694: print_byte(h, '"');
1.194 schwarze 695: }
1.244 schwarze 696:
697: style_written = 0;
698: while (*fmt++ == 's') {
699: arg1 = va_arg(ap, char *);
700: arg2 = va_arg(ap, char *);
701: if (arg2 == NULL)
702: continue;
703: print_byte(h, ' ');
704: if (style_written == 0) {
705: print_word(h, "style=\"");
706: style_written = 1;
707: }
708: print_word(h, arg1);
709: print_byte(h, ':');
710: print_byte(h, ' ');
711: print_word(h, arg2);
712: print_byte(h, ';');
713: }
714: if (style_written)
715: print_byte(h, '"');
716:
1.194 schwarze 717: va_end(ap);
1.94 kristaps 718:
1.172 kristaps 719: /* Accommodate for "well-formed" singleton escaping. */
1.94 kristaps 720:
1.257 schwarze 721: if (htmltags[tag].flags & HTML_NOSTACK)
1.197 schwarze 722: print_byte(h, '/');
1.93 kristaps 723:
1.197 schwarze 724: print_byte(h, '>');
1.14 kristaps 725:
1.196 schwarze 726: if (tflags & HTML_NLBEGIN)
1.197 schwarze 727: print_endline(h);
1.196 schwarze 728: else
729: h->flags |= HTML_NOSPACE;
1.117 kristaps 730:
1.196 schwarze 731: if (tflags & HTML_INDENT)
732: h->indent++;
733: if (tflags & HTML_NOINDENT)
734: h->noindent++;
1.117 kristaps 735:
1.188 schwarze 736: return t;
1.14 kristaps 737: }
738:
1.29 kristaps 739: static void
1.184 schwarze 740: print_ctag(struct html *h, struct tag *tag)
1.14 kristaps 741: {
1.196 schwarze 742: int tflags;
1.156 schwarze 743:
1.252 schwarze 744: if (tag->closed == 0) {
745: tag->closed = 1;
746: if (tag == h->metaf)
747: h->metaf = NULL;
748: if (tag == h->tblt)
749: h->tblt = NULL;
750:
751: tflags = htmltags[tag->tag].flags;
752: if (tflags & HTML_INDENT)
753: h->indent--;
754: if (tflags & HTML_NOINDENT)
755: h->noindent--;
756: if (tflags & HTML_NLEND)
757: print_endline(h);
758: print_indent(h);
759: print_byte(h, '<');
760: print_byte(h, '/');
761: print_word(h, htmltags[tag->tag].name);
762: print_byte(h, '>');
763: if (tflags & HTML_NLAFTER)
764: print_endline(h);
765: }
766: if (tag->refcnt == 0) {
767: h->tag = tag->next;
768: free(tag);
769: }
1.14 kristaps 770: }
771:
1.51 kristaps 772: void
1.93 kristaps 773: print_gen_decls(struct html *h)
1.1 kristaps 774: {
1.197 schwarze 775: print_word(h, "<!DOCTYPE html>");
776: print_endline(h);
1.221 schwarze 777: }
778:
779: void
780: print_gen_comment(struct html *h, struct roff_node *n)
781: {
782: int wantblank;
783:
784: print_word(h, "<!-- This is an automatically generated file."
785: " Do not edit.");
786: h->indent = 1;
787: wantblank = 0;
788: while (n != NULL && n->type == ROFFT_COMMENT) {
789: if (strstr(n->string, "-->") == NULL &&
790: (wantblank || *n->string != '\0')) {
791: print_endline(h);
792: print_indent(h);
793: print_word(h, n->string);
794: wantblank = *n->string != '\0';
795: }
796: n = n->next;
797: }
798: if (wantblank)
799: print_endline(h);
800: print_word(h, " -->");
801: print_endline(h);
802: h->indent = 0;
1.1 kristaps 803: }
804:
1.51 kristaps 805: void
1.104 kristaps 806: print_text(struct html *h, const char *word)
1.1 kristaps 807: {
1.260 schwarze 808: /*
809: * Always wrap text in a paragraph unless already contained in
810: * some flow container; never put it directly into a section.
811: */
812:
813: if (h->tag->tag == TAG_SECTION)
814: print_otag(h, TAG_P, "c", "Pp");
815:
816: /* Output whitespace before this text? */
817:
1.197 schwarze 818: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.105 kristaps 819: if ( ! (HTML_KEEP & h->flags)) {
820: if (HTML_PREKEEP & h->flags)
821: h->flags |= HTML_KEEP;
1.197 schwarze 822: print_endword(h);
1.105 kristaps 823: } else
1.216 schwarze 824: print_word(h, " ");
1.105 kristaps 825: }
1.260 schwarze 826:
827: /*
828: * Print the text, optionally surrounded by HTML whitespace,
829: * optionally manually switching fonts before and after.
830: */
1.30 kristaps 831:
1.255 schwarze 832: assert(h->metaf == NULL);
833: print_metaf(h);
834: print_indent(h);
1.195 schwarze 835: if ( ! print_encode(h, word, NULL, 0)) {
1.109 kristaps 836: if ( ! (h->flags & HTML_NONOSPACE))
837: h->flags &= ~HTML_NOSPACE;
1.183 schwarze 838: h->flags &= ~HTML_NONEWLINE;
1.149 kristaps 839: } else
1.183 schwarze 840: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.122 kristaps 841:
1.255 schwarze 842: if (h->metaf != NULL) {
1.122 kristaps 843: print_tagq(h, h->metaf);
844: h->metaf = NULL;
845: }
1.113 schwarze 846:
847: h->flags &= ~HTML_IGNDELIM;
1.1 kristaps 848: }
1.30 kristaps 849:
1.51 kristaps 850: void
1.30 kristaps 851: print_tagq(struct html *h, const struct tag *until)
852: {
1.252 schwarze 853: struct tag *this, *next;
1.30 kristaps 854:
1.252 schwarze 855: for (this = h->tag; this != NULL; this = next) {
856: next = this == until ? NULL : this->next;
857: print_ctag(h, this);
1.30 kristaps 858: }
859: }
860:
1.250 schwarze 861: /*
862: * Close out all open elements up to but excluding suntil.
863: * Note that a paragraph just inside stays open together with it
864: * because paragraphs include subsequent phrasing content.
865: */
1.51 kristaps 866: void
1.30 kristaps 867: print_stagq(struct html *h, const struct tag *suntil)
868: {
1.252 schwarze 869: struct tag *this, *next;
1.30 kristaps 870:
1.252 schwarze 871: for (this = h->tag; this != NULL; this = next) {
872: next = this->next;
873: if (this == suntil || (next == suntil &&
874: (this->tag == TAG_P || this->tag == TAG_PRE)))
875: break;
876: print_ctag(h, this);
1.30 kristaps 877: }
1.171 kristaps 878: }
879:
1.197 schwarze 880:
881: /***********************************************************************
882: * Low level output functions.
883: * They implement line breaking using a short static buffer.
884: ***********************************************************************/
885:
886: /*
887: * Buffer one HTML output byte.
888: * If the buffer is full, flush and deactivate it and start a new line.
889: * If the buffer is inactive, print directly.
890: */
891: static void
892: print_byte(struct html *h, char c)
893: {
894: if ((h->flags & HTML_BUFFER) == 0) {
895: putchar(c);
896: h->col++;
897: return;
898: }
899:
900: if (h->col + h->bufcol < sizeof(h->buf)) {
901: h->buf[h->bufcol++] = c;
902: return;
903: }
904:
905: putchar('\n');
906: h->col = 0;
907: print_indent(h);
908: putchar(' ');
909: putchar(' ');
910: fwrite(h->buf, h->bufcol, 1, stdout);
911: putchar(c);
912: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
913: h->bufcol = 0;
914: h->flags &= ~HTML_BUFFER;
915: }
916:
1.196 schwarze 917: /*
918: * If something was printed on the current output line, end it.
1.197 schwarze 919: * Not to be called right after print_indent().
1.196 schwarze 920: */
1.202 schwarze 921: void
1.197 schwarze 922: print_endline(struct html *h)
1.196 schwarze 923: {
1.197 schwarze 924: if (h->col == 0)
1.196 schwarze 925: return;
926:
1.197 schwarze 927: if (h->bufcol) {
928: putchar(' ');
929: fwrite(h->buf, h->bufcol, 1, stdout);
930: h->bufcol = 0;
931: }
1.196 schwarze 932: putchar('\n');
1.197 schwarze 933: h->col = 0;
934: h->flags |= HTML_NOSPACE;
935: h->flags &= ~HTML_BUFFER;
936: }
937:
938: /*
939: * Flush the HTML output buffer.
940: * If it is inactive, activate it.
941: */
942: static void
943: print_endword(struct html *h)
944: {
945: if (h->noindent) {
946: print_byte(h, ' ');
947: return;
948: }
949:
950: if ((h->flags & HTML_BUFFER) == 0) {
951: h->col++;
952: h->flags |= HTML_BUFFER;
953: } else if (h->bufcol) {
954: putchar(' ');
955: fwrite(h->buf, h->bufcol, 1, stdout);
956: h->col += h->bufcol + 1;
957: }
958: h->bufcol = 0;
1.196 schwarze 959: }
960:
961: /*
962: * If at the beginning of a new output line,
963: * perform indentation and mark the line as containing output.
964: * Make sure to really produce some output right afterwards,
965: * but do not use print_otag() for producing it.
966: */
967: static void
1.197 schwarze 968: print_indent(struct html *h)
1.196 schwarze 969: {
1.197 schwarze 970: size_t i;
1.196 schwarze 971:
1.261 schwarze 972: if (h->col || h->noindent)
1.196 schwarze 973: return;
974:
1.261 schwarze 975: h->col = h->indent * 2;
976: for (i = 0; i < h->col; i++)
977: putchar(' ');
1.197 schwarze 978: }
979:
980: /*
981: * Print or buffer some characters
982: * depending on the current HTML output buffer state.
983: */
984: static void
985: print_word(struct html *h, const char *cp)
986: {
987: while (*cp != '\0')
988: print_byte(h, *cp++);
1.68 kristaps 989: }
CVSweb