Annotation of mandoc/html.c, Revision 1.276
1.276 ! schwarze 1: /* $Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp $ */
1.1 kristaps 2: /*
1.176 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.274 schwarze 4: * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.186 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.29 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.186 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.29 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.264 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 kristaps 20: */
1.92 kristaps 21: #include "config.h"
22:
1.41 kristaps 23: #include <sys/types.h>
1.240 schwarze 24: #include <sys/stat.h>
1.30 kristaps 25:
1.1 kristaps 26: #include <assert.h>
1.68 kristaps 27: #include <ctype.h>
1.76 kristaps 28: #include <stdarg.h>
1.229 schwarze 29: #include <stddef.h>
1.29 kristaps 30: #include <stdio.h>
1.63 kristaps 31: #include <stdint.h>
1.1 kristaps 32: #include <stdlib.h>
1.33 kristaps 33: #include <string.h>
1.45 kristaps 34: #include <unistd.h>
1.1 kristaps 35:
1.210 schwarze 36: #include "mandoc_aux.h"
1.229 schwarze 37: #include "mandoc_ohash.h"
1.100 kristaps 38: #include "mandoc.h"
1.210 schwarze 39: #include "roff.h"
1.58 kristaps 40: #include "out.h"
1.51 kristaps 41: #include "html.h"
1.186 schwarze 42: #include "manconf.h"
1.64 kristaps 43: #include "main.h"
1.63 kristaps 44:
1.29 kristaps 45: struct htmldata {
1.63 kristaps 46: const char *name;
1.29 kristaps 47: int flags;
1.257 schwarze 48: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.196 schwarze 55: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.257 schwarze 58: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.29 kristaps 60: };
1.7 kristaps 61:
1.29 kristaps 62: static const struct htmldata htmltags[TAG_MAX] = {
1.196 schwarze 63: {"html", HTML_NLALL},
64: {"head", HTML_NLALL | HTML_INDENT},
1.257 schwarze 65: {"meta", HTML_NOSTACK | HTML_NLALL},
66: {"link", HTML_NOSTACK | HTML_NLALL},
67: {"style", HTML_NLALL | HTML_INDENT},
68: {"title", HTML_NLAROUND},
1.196 schwarze 69: {"body", HTML_NLALL},
70: {"div", HTML_NLAROUND},
1.253 schwarze 71: {"section", HTML_NLALL},
1.276 ! schwarze 72: {"nav", HTML_NLALL},
1.196 schwarze 73: {"table", HTML_NLALL | HTML_INDENT},
74: {"tr", HTML_NLALL | HTML_INDENT},
75: {"td", HTML_NLAROUND},
76: {"li", HTML_NLAROUND | HTML_INDENT},
77: {"ul", HTML_NLALL | HTML_INDENT},
78: {"ol", HTML_NLALL | HTML_INDENT},
79: {"dl", HTML_NLALL | HTML_INDENT},
80: {"dt", HTML_NLAROUND},
81: {"dd", HTML_NLAROUND | HTML_INDENT},
1.257 schwarze 82: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
83: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
84: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
1.271 schwarze 85: {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
1.257 schwarze 86: {"a", HTML_INPHRASE | HTML_TOPHRASE},
87: {"b", HTML_INPHRASE | HTML_TOPHRASE},
88: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
89: {"code", HTML_INPHRASE | HTML_TOPHRASE},
90: {"i", HTML_INPHRASE | HTML_TOPHRASE},
91: {"small", HTML_INPHRASE | HTML_TOPHRASE},
92: {"span", HTML_INPHRASE | HTML_TOPHRASE},
93: {"var", HTML_INPHRASE | HTML_TOPHRASE},
94: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.275 schwarze 95: {"hr", HTML_INPHRASE | HTML_NOSTACK},
1.263 schwarze 96: {"mark", HTML_INPHRASE },
1.257 schwarze 97: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.196 schwarze 98: {"mrow", 0},
99: {"mi", 0},
1.215 schwarze 100: {"mn", 0},
1.196 schwarze 101: {"mo", 0},
102: {"msup", 0},
103: {"msub", 0},
104: {"msubsup", 0},
105: {"mfrac", 0},
106: {"msqrt", 0},
107: {"mfenced", 0},
108: {"mtable", 0},
109: {"mtr", 0},
110: {"mtd", 0},
111: {"munderover", 0},
112: {"munder", 0},
113: {"mover", 0},
1.90 kristaps 114: };
115:
1.229 schwarze 116: /* Avoid duplicate HTML id= attributes. */
1.269 schwarze 117:
118: struct id_entry {
119: int ord; /* Ordinal number of the latest occurrence. */
120: char id[]; /* The id= attribute without any ordinal suffix. */
121: };
1.229 schwarze 122: static struct ohash id_unique;
123:
1.254 schwarze 124: static void html_reset_internal(struct html *);
1.197 schwarze 125: static void print_byte(struct html *, char);
126: static void print_endword(struct html *);
127: static void print_indent(struct html *);
128: static void print_word(struct html *, const char *);
129:
1.184 schwarze 130: static void print_ctag(struct html *, struct tag *);
1.197 schwarze 131: static int print_escape(struct html *, char);
1.195 schwarze 132: static int print_encode(struct html *, const char *, const char *, int);
133: static void print_href(struct html *, const char *, const char *, int);
1.255 schwarze 134: static void print_metaf(struct html *);
1.82 kristaps 135:
1.156 schwarze 136:
1.180 schwarze 137: void *
1.191 schwarze 138: html_alloc(const struct manoutput *outopts)
1.10 kristaps 139: {
1.30 kristaps 140: struct html *h;
141:
1.128 kristaps 142: h = mandoc_calloc(1, sizeof(struct html));
1.10 kristaps 143:
1.204 schwarze 144: h->tag = NULL;
1.272 schwarze 145: h->metac = h->metal = ESCAPE_FONTROMAN;
1.186 schwarze 146: h->style = outopts->style;
1.240 schwarze 147: if ((h->base_man1 = outopts->man) == NULL)
148: h->base_man2 = NULL;
149: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
150: *h->base_man2++ = '\0';
1.186 schwarze 151: h->base_includes = outopts->includes;
152: if (outopts->fragment)
153: h->oflags |= HTML_FRAGMENT;
1.241 schwarze 154: if (outopts->toc)
155: h->oflags |= HTML_TOC;
1.43 kristaps 156:
1.269 schwarze 157: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.229 schwarze 158:
1.188 schwarze 159: return h;
1.29 kristaps 160: }
1.10 kristaps 161:
1.254 schwarze 162: static void
163: html_reset_internal(struct html *h)
1.29 kristaps 164: {
1.30 kristaps 165: struct tag *tag;
1.269 schwarze 166: struct id_entry *entry;
1.229 schwarze 167: unsigned int slot;
1.30 kristaps 168:
1.204 schwarze 169: while ((tag = h->tag) != NULL) {
170: h->tag = tag->next;
1.30 kristaps 171: free(tag);
172: }
1.269 schwarze 173: entry = ohash_first(&id_unique, &slot);
174: while (entry != NULL) {
175: free(entry);
176: entry = ohash_next(&id_unique, &slot);
1.229 schwarze 177: }
178: ohash_delete(&id_unique);
1.254 schwarze 179: }
180:
181: void
182: html_reset(void *p)
183: {
184: html_reset_internal(p);
1.269 schwarze 185: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.254 schwarze 186: }
187:
188: void
189: html_free(void *p)
190: {
191: html_reset_internal(p);
192: free(p);
1.10 kristaps 193: }
1.2 kristaps 194:
1.51 kristaps 195: void
1.29 kristaps 196: print_gen_head(struct html *h)
197: {
1.165 kristaps 198: struct tag *t;
1.41 kristaps 199:
1.194 schwarze 200: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.273 schwarze 201: print_otag(h, TAG_META, "??", "name", "viewport",
202: "content", "width=device-width, initial-scale=1.0");
1.222 schwarze 203: if (h->style != NULL) {
204: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
205: h->style, "type", "text/css", "media", "all");
206: return;
207: }
1.165 kristaps 208:
1.168 kristaps 209: /*
1.222 schwarze 210: * Print a minimal embedded style sheet.
1.168 kristaps 211: */
1.196 schwarze 212:
1.194 schwarze 213: t = print_otag(h, TAG_STYLE, "");
1.196 schwarze 214: print_text(h, "table.head, table.foot { width: 100%; }");
1.197 schwarze 215: print_endline(h);
1.196 schwarze 216: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.197 schwarze 217: print_endline(h);
1.196 schwarze 218: print_text(h, "td.head-vol { text-align: center; }");
1.197 schwarze 219: print_endline(h);
1.256 schwarze 220: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.225 schwarze 221: print_endline(h);
1.256 schwarze 222: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.226 schwarze 223: print_endline(h);
1.256 schwarze 224: print_text(h, ".Ms { font-weight: bold; }");
1.228 schwarze 225: print_endline(h);
1.256 schwarze 226: print_text(h, ".Bl-diag ");
1.224 schwarze 227: print_byte(h, '>');
228: print_text(h, " dt { font-weight: bold; }");
1.223 schwarze 229: print_endline(h);
1.256 schwarze 230: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
231: "{ font-weight: bold; font-family: inherit; }");
1.165 kristaps 232: print_tagq(h, t);
1.4 kristaps 233: }
234:
1.255 schwarze 235: int
236: html_setfont(struct html *h, enum mandoc_esc font)
1.88 kristaps 237: {
1.255 schwarze 238: switch (font) {
1.156 schwarze 239: case ESCAPE_FONTPREV:
1.90 kristaps 240: font = h->metal;
1.88 kristaps 241: break;
1.156 schwarze 242: case ESCAPE_FONTITALIC:
243: case ESCAPE_FONTBOLD:
244: case ESCAPE_FONTBI:
1.255 schwarze 245: case ESCAPE_FONTROMAN:
1.274 schwarze 246: case ESCAPE_FONTCR:
247: case ESCAPE_FONTCB:
248: case ESCAPE_FONTCI:
1.242 schwarze 249: break;
1.156 schwarze 250: case ESCAPE_FONT:
1.255 schwarze 251: font = ESCAPE_FONTROMAN;
1.88 kristaps 252: break;
253: default:
1.255 schwarze 254: return 0;
1.88 kristaps 255: }
1.255 schwarze 256: h->metal = h->metac;
257: h->metac = font;
258: return 1;
259: }
1.88 kristaps 260:
1.255 schwarze 261: static void
262: print_metaf(struct html *h)
263: {
1.122 kristaps 264: if (h->metaf) {
265: print_tagq(h, h->metaf);
266: h->metaf = NULL;
267: }
1.255 schwarze 268: switch (h->metac) {
269: case ESCAPE_FONTITALIC:
1.194 schwarze 270: h->metaf = print_otag(h, TAG_I, "");
1.152 schwarze 271: break;
1.255 schwarze 272: case ESCAPE_FONTBOLD:
1.194 schwarze 273: h->metaf = print_otag(h, TAG_B, "");
1.152 schwarze 274: break;
1.255 schwarze 275: case ESCAPE_FONTBI:
1.194 schwarze 276: h->metaf = print_otag(h, TAG_B, "");
277: print_otag(h, TAG_I, "");
1.152 schwarze 278: break;
1.274 schwarze 279: case ESCAPE_FONTCR:
1.242 schwarze 280: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
281: break;
1.274 schwarze 282: case ESCAPE_FONTCB:
283: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
284: print_otag(h, TAG_B, "");
285: break;
286: case ESCAPE_FONTCI:
287: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
288: print_otag(h, TAG_I, "");
289: break;
1.152 schwarze 290: default:
291: break;
292: }
1.248 schwarze 293: }
294:
1.249 schwarze 295: void
296: html_close_paragraph(struct html *h)
297: {
1.259 schwarze 298: struct tag *this, *next;
299: int flags;
1.249 schwarze 300:
1.259 schwarze 301: this = h->tag;
302: for (;;) {
303: next = this->next;
304: flags = htmltags[this->tag].flags;
305: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
306: print_ctag(h, this);
307: if ((flags & HTML_INPHRASE) == 0)
1.249 schwarze 308: break;
1.259 schwarze 309: this = next;
1.249 schwarze 310: }
311: }
312:
1.248 schwarze 313: /*
314: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
315: * TOKEN_NONE does not switch. The old mode is returned.
316: */
317: enum roff_tok
318: html_fillmode(struct html *h, enum roff_tok want)
319: {
320: struct tag *t;
321: enum roff_tok had;
322:
323: for (t = h->tag; t != NULL; t = t->next)
324: if (t->tag == TAG_PRE)
325: break;
326:
327: had = t == NULL ? ROFF_fi : ROFF_nf;
328:
329: if (want != had) {
330: switch (want) {
331: case ROFF_fi:
332: print_tagq(h, t);
333: break;
334: case ROFF_nf:
1.249 schwarze 335: html_close_paragraph(h);
1.248 schwarze 336: print_otag(h, TAG_PRE, "");
337: break;
338: case TOKEN_NONE:
339: break;
340: default:
341: abort();
342: }
343: }
344: return had;
1.210 schwarze 345: }
346:
1.264 schwarze 347: /*
348: * Allocate a string to be used for the "id=" attribute of an HTML
349: * element and/or as a segment identifier for a URI in an <a> element.
350: * The function may fail and return NULL if the node lacks text data
351: * to create the attribute from.
1.269 schwarze 352: * The caller is responsible for free(3)ing the returned string.
353: *
1.264 schwarze 354: * If the "unique" argument is non-zero, the "id_unique" ohash table
1.269 schwarze 355: * is used for de-duplication. If the "unique" argument is 1,
356: * it is the first time the function is called for this tag and
357: * location, so if an ordinal suffix is needed, it is incremented.
358: * If the "unique" argument is 2, it is the second time the function
359: * is called for this tag and location, so the ordinal suffix
360: * remains unchanged.
1.264 schwarze 361: */
1.210 schwarze 362: char *
1.229 schwarze 363: html_make_id(const struct roff_node *n, int unique)
1.210 schwarze 364: {
365: const struct roff_node *nch;
1.269 schwarze 366: struct id_entry *entry;
367: char *buf, *cp;
368: size_t len;
1.229 schwarze 369: unsigned int slot;
1.210 schwarze 370:
1.267 schwarze 371: if (n->tag != NULL)
372: buf = mandoc_strdup(n->tag);
1.264 schwarze 373: else {
374: switch (n->tok) {
375: case MDOC_Sh:
376: case MDOC_Ss:
377: case MDOC_Sx:
378: case MAN_SH:
379: case MAN_SS:
380: for (nch = n->child; nch != NULL; nch = nch->next)
381: if (nch->type != ROFFT_TEXT)
382: return NULL;
383: buf = NULL;
384: deroff(&buf, n);
385: if (buf == NULL)
386: return NULL;
387: break;
388: default:
1.265 schwarze 389: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.264 schwarze 390: return NULL;
391: buf = mandoc_strdup(n->child->string);
392: break;
393: }
394: }
1.210 schwarze 395:
1.230 schwarze 396: /*
397: * In ID attributes, only use ASCII characters that are
398: * permitted in URL-fragment strings according to the
399: * explicit list at:
400: * https://url.spec.whatwg.org/#url-fragment-string
1.270 schwarze 401: * In addition, reserve '~' for ordinal suffixes.
1.230 schwarze 402: */
1.210 schwarze 403:
404: for (cp = buf; *cp != '\0'; cp++)
1.230 schwarze 405: if (isalnum((unsigned char)*cp) == 0 &&
1.270 schwarze 406: strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
1.210 schwarze 407: *cp = '_';
408:
1.229 schwarze 409: if (unique == 0)
410: return buf;
411:
412: /* Avoid duplicate HTML id= attributes. */
413:
414: slot = ohash_qlookup(&id_unique, buf);
1.269 schwarze 415: if ((entry = ohash_find(&id_unique, slot)) == NULL) {
416: len = strlen(buf) + 1;
417: entry = mandoc_malloc(sizeof(*entry) + len);
418: entry->ord = 1;
419: memcpy(entry->id, buf, len);
420: ohash_insert(&id_unique, slot, entry);
421: } else if (unique == 1)
422: entry->ord++;
423:
424: if (entry->ord > 1) {
425: cp = buf;
1.270 schwarze 426: mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
1.269 schwarze 427: free(cp);
1.229 schwarze 428: }
1.210 schwarze 429: return buf;
1.88 kristaps 430: }
431:
1.85 kristaps 432: static int
1.197 schwarze 433: print_escape(struct html *h, char c)
1.159 schwarze 434: {
435:
436: switch (c) {
437: case '<':
1.197 schwarze 438: print_word(h, "<");
1.159 schwarze 439: break;
440: case '>':
1.197 schwarze 441: print_word(h, ">");
1.159 schwarze 442: break;
443: case '&':
1.197 schwarze 444: print_word(h, "&");
1.159 schwarze 445: break;
446: case '"':
1.197 schwarze 447: print_word(h, """);
1.159 schwarze 448: break;
449: case ASCII_NBRSP:
1.197 schwarze 450: print_word(h, " ");
1.159 schwarze 451: break;
452: case ASCII_HYPH:
1.197 schwarze 453: print_byte(h, '-');
1.189 schwarze 454: break;
1.159 schwarze 455: case ASCII_BREAK:
456: break;
457: default:
1.188 schwarze 458: return 0;
1.159 schwarze 459: }
1.188 schwarze 460: return 1;
1.159 schwarze 461: }
462:
463: static int
1.195 schwarze 464: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.29 kristaps 465: {
1.197 schwarze 466: char numbuf[16];
1.214 schwarze 467: const char *seq;
1.77 kristaps 468: size_t sz;
1.214 schwarze 469: int c, len, breakline, nospace;
1.132 kristaps 470: enum mandoc_esc esc;
1.214 schwarze 471: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.154 schwarze 472: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.14 kristaps 473:
1.195 schwarze 474: if (pend == NULL)
475: pend = strchr(p, '\0');
476:
1.214 schwarze 477: breakline = 0;
1.85 kristaps 478: nospace = 0;
479:
1.195 schwarze 480: while (p < pend) {
1.151 schwarze 481: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
482: h->flags &= ~HTML_SKIPCHAR;
483: p++;
484: continue;
485: }
486:
1.197 schwarze 487: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.214 schwarze 488: print_byte(h, *p);
489:
490: if (breakline &&
491: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.245 schwarze 492: print_otag(h, TAG_BR, "");
1.214 schwarze 493: breakline = 0;
494: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
495: p++;
496: continue;
497: }
1.77 kristaps 498:
1.195 schwarze 499: if (p >= pend)
1.132 kristaps 500: break;
501:
1.214 schwarze 502: if (*p == ' ') {
503: print_endword(h);
504: p++;
505: continue;
506: }
507:
1.197 schwarze 508: if (print_escape(h, *p++))
1.154 schwarze 509: continue;
1.77 kristaps 510:
1.132 kristaps 511: esc = mandoc_escape(&p, &seq, &len);
512: switch (esc) {
1.156 schwarze 513: case ESCAPE_FONT:
514: case ESCAPE_FONTPREV:
515: case ESCAPE_FONTBOLD:
516: case ESCAPE_FONTITALIC:
517: case ESCAPE_FONTBI:
518: case ESCAPE_FONTROMAN:
1.274 schwarze 519: case ESCAPE_FONTCR:
520: case ESCAPE_FONTCB:
521: case ESCAPE_FONTCI:
1.243 schwarze 522: if (0 == norecurse) {
523: h->flags |= HTML_NOSPACE;
1.255 schwarze 524: if (html_setfont(h, esc))
525: print_metaf(h);
1.243 schwarze 526: h->flags &= ~HTML_NOSPACE;
527: }
1.151 schwarze 528: continue;
1.156 schwarze 529: case ESCAPE_SKIPCHAR:
1.151 schwarze 530: h->flags |= HTML_SKIPCHAR;
531: continue;
1.246 schwarze 532: case ESCAPE_ERROR:
533: continue;
1.151 schwarze 534: default:
535: break;
536: }
537:
538: if (h->flags & HTML_SKIPCHAR) {
539: h->flags &= ~HTML_SKIPCHAR;
540: continue;
541: }
542:
543: switch (esc) {
1.156 schwarze 544: case ESCAPE_UNICODE:
1.159 schwarze 545: /* Skip past "u" header. */
1.144 kristaps 546: c = mchars_num2uc(seq + 1, len - 1);
547: break;
1.156 schwarze 548: case ESCAPE_NUMBERED:
1.141 kristaps 549: c = mchars_num2char(seq, len);
1.181 schwarze 550: if (c < 0)
551: continue;
1.82 kristaps 552: break;
1.156 schwarze 553: case ESCAPE_SPECIAL:
1.191 schwarze 554: c = mchars_spec2cp(seq, len);
1.181 schwarze 555: if (c <= 0)
556: continue;
1.246 schwarze 557: break;
558: case ESCAPE_UNDEF:
559: c = *seq;
1.132 kristaps 560: break;
1.239 schwarze 561: case ESCAPE_DEVICE:
562: print_word(h, "html");
563: continue;
1.214 schwarze 564: case ESCAPE_BREAK:
565: breakline = 1;
566: continue;
1.156 schwarze 567: case ESCAPE_NOSPACE:
1.132 kristaps 568: if ('\0' == *p)
569: nospace = 1;
1.179 schwarze 570: continue;
1.185 schwarze 571: case ESCAPE_OVERSTRIKE:
572: if (len == 0)
573: continue;
574: c = seq[len - 1];
575: break;
1.82 kristaps 576: default:
1.179 schwarze 577: continue;
1.82 kristaps 578: }
1.181 schwarze 579: if ((c < 0x20 && c != 0x09) ||
580: (c > 0x7E && c < 0xA0))
1.179 schwarze 581: c = 0xFFFD;
1.197 schwarze 582: if (c > 0x7E) {
1.216 schwarze 583: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.197 schwarze 584: print_word(h, numbuf);
585: } else if (print_escape(h, c) == 0)
586: print_byte(h, c);
1.32 kristaps 587: }
1.85 kristaps 588:
1.188 schwarze 589: return nospace;
1.14 kristaps 590: }
591:
1.94 kristaps 592: static void
1.195 schwarze 593: print_href(struct html *h, const char *name, const char *sec, int man)
1.94 kristaps 594: {
1.240 schwarze 595: struct stat sb;
1.195 schwarze 596: const char *p, *pp;
1.240 schwarze 597: char *filename;
598:
599: if (man) {
600: pp = h->base_man1;
601: if (h->base_man2 != NULL) {
602: mandoc_asprintf(&filename, "%s.%s", name, sec);
603: if (stat(filename, &sb) == -1)
604: pp = h->base_man2;
605: free(filename);
606: }
607: } else
608: pp = h->base_includes;
1.195 schwarze 609:
610: while ((p = strchr(pp, '%')) != NULL) {
611: print_encode(h, pp, p, 1);
612: if (man && p[1] == 'S') {
613: if (sec == NULL)
1.197 schwarze 614: print_byte(h, '1');
1.195 schwarze 615: else
616: print_encode(h, sec, NULL, 1);
617: } else if ((man && p[1] == 'N') ||
618: (man == 0 && p[1] == 'I'))
619: print_encode(h, name, NULL, 1);
620: else
621: print_encode(h, p, p + 2, 1);
622: pp = p + 2;
623: }
624: if (*pp != '\0')
625: print_encode(h, pp, NULL, 1);
1.94 kristaps 626: }
627:
1.51 kristaps 628: struct tag *
1.194 schwarze 629: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.14 kristaps 630: {
1.194 schwarze 631: va_list ap;
1.30 kristaps 632: struct tag *t;
1.195 schwarze 633: const char *attr;
1.203 schwarze 634: char *arg1, *arg2;
1.244 schwarze 635: int style_written, tflags;
1.196 schwarze 636:
637: tflags = htmltags[tag].flags;
1.30 kristaps 638:
1.257 schwarze 639: /* Flow content is not allowed in phrasing context. */
640:
641: if ((tflags & HTML_INPHRASE) == 0) {
642: for (t = h->tag; t != NULL; t = t->next) {
643: if (t->closed)
644: continue;
645: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
646: break;
647: }
1.260 schwarze 648:
649: /*
650: * Always wrap phrasing elements in a paragraph
651: * unless already contained in some flow container;
652: * never put them directly into a section.
653: */
654:
655: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
656: print_otag(h, TAG_P, "c", "Pp");
1.257 schwarze 657:
1.204 schwarze 658: /* Push this tag onto the stack of open scopes. */
1.94 kristaps 659:
1.196 schwarze 660: if ((tflags & HTML_NOSTACK) == 0) {
1.128 kristaps 661: t = mandoc_malloc(sizeof(struct tag));
1.30 kristaps 662: t->tag = tag;
1.204 schwarze 663: t->next = h->tag;
1.252 schwarze 664: t->refcnt = 0;
665: t->closed = 0;
1.204 schwarze 666: h->tag = t;
1.30 kristaps 667: } else
668: t = NULL;
1.29 kristaps 669:
1.196 schwarze 670: if (tflags & HTML_NLBEFORE)
1.197 schwarze 671: print_endline(h);
672: if (h->col == 0)
673: print_indent(h);
1.196 schwarze 674: else if ((h->flags & HTML_NOSPACE) == 0) {
675: if (h->flags & HTML_KEEP)
1.216 schwarze 676: print_word(h, " ");
1.196 schwarze 677: else {
678: if (h->flags & HTML_PREKEEP)
679: h->flags |= HTML_KEEP;
1.197 schwarze 680: print_endword(h);
1.105 kristaps 681: }
1.196 schwarze 682: }
1.29 kristaps 683:
1.109 kristaps 684: if ( ! (h->flags & HTML_NONOSPACE))
685: h->flags &= ~HTML_NOSPACE;
1.110 kristaps 686: else
687: h->flags |= HTML_NOSPACE;
1.109 kristaps 688:
1.94 kristaps 689: /* Print out the tag name and attributes. */
690:
1.197 schwarze 691: print_byte(h, '<');
692: print_word(h, htmltags[tag].name);
1.194 schwarze 693:
694: va_start(ap, fmt);
695:
1.244 schwarze 696: while (*fmt != '\0' && *fmt != 's') {
1.203 schwarze 697:
1.238 schwarze 698: /* Parse attributes and arguments. */
1.203 schwarze 699:
700: arg1 = va_arg(ap, char *);
1.238 schwarze 701: arg2 = NULL;
1.194 schwarze 702: switch (*fmt++) {
703: case 'c':
1.195 schwarze 704: attr = "class";
1.194 schwarze 705: break;
706: case 'h':
1.195 schwarze 707: attr = "href";
1.194 schwarze 708: break;
709: case 'i':
1.195 schwarze 710: attr = "id";
1.276 ! schwarze 711: break;
! 712: case 'r':
! 713: attr = "role";
1.194 schwarze 714: break;
715: case '?':
1.203 schwarze 716: attr = arg1;
717: arg1 = va_arg(ap, char *);
1.194 schwarze 718: break;
719: default:
720: abort();
721: }
1.203 schwarze 722: if (*fmt == 'M')
723: arg2 = va_arg(ap, char *);
724: if (arg1 == NULL)
725: continue;
726:
1.238 schwarze 727: /* Print the attributes. */
1.203 schwarze 728:
1.197 schwarze 729: print_byte(h, ' ');
730: print_word(h, attr);
731: print_byte(h, '=');
732: print_byte(h, '"');
1.195 schwarze 733: switch (*fmt) {
1.208 schwarze 734: case 'I':
735: print_href(h, arg1, NULL, 0);
736: fmt++;
737: break;
1.195 schwarze 738: case 'M':
1.203 schwarze 739: print_href(h, arg1, arg2, 1);
1.195 schwarze 740: fmt++;
741: break;
1.208 schwarze 742: case 'R':
743: print_byte(h, '#');
744: print_encode(h, arg1, NULL, 1);
1.195 schwarze 745: fmt++;
1.208 schwarze 746: break;
1.195 schwarze 747: default:
1.244 schwarze 748: print_encode(h, arg1, NULL, 1);
1.195 schwarze 749: break;
750: }
1.197 schwarze 751: print_byte(h, '"');
1.194 schwarze 752: }
1.244 schwarze 753:
754: style_written = 0;
755: while (*fmt++ == 's') {
756: arg1 = va_arg(ap, char *);
757: arg2 = va_arg(ap, char *);
758: if (arg2 == NULL)
759: continue;
760: print_byte(h, ' ');
761: if (style_written == 0) {
762: print_word(h, "style=\"");
763: style_written = 1;
764: }
765: print_word(h, arg1);
766: print_byte(h, ':');
767: print_byte(h, ' ');
768: print_word(h, arg2);
769: print_byte(h, ';');
770: }
771: if (style_written)
772: print_byte(h, '"');
773:
1.194 schwarze 774: va_end(ap);
1.94 kristaps 775:
1.172 kristaps 776: /* Accommodate for "well-formed" singleton escaping. */
1.94 kristaps 777:
1.257 schwarze 778: if (htmltags[tag].flags & HTML_NOSTACK)
1.197 schwarze 779: print_byte(h, '/');
1.93 kristaps 780:
1.197 schwarze 781: print_byte(h, '>');
1.14 kristaps 782:
1.196 schwarze 783: if (tflags & HTML_NLBEGIN)
1.197 schwarze 784: print_endline(h);
1.196 schwarze 785: else
786: h->flags |= HTML_NOSPACE;
1.117 kristaps 787:
1.196 schwarze 788: if (tflags & HTML_INDENT)
789: h->indent++;
790: if (tflags & HTML_NOINDENT)
791: h->noindent++;
1.117 kristaps 792:
1.188 schwarze 793: return t;
1.264 schwarze 794: }
795:
796: /*
797: * Print an element with an optional "id=" attribute.
1.265 schwarze 798: * If the element has phrasing content and an "id=" attribute,
799: * also add a permalink: outside if it can be in phrasing context,
800: * inside otherwise.
1.264 schwarze 801: */
802: struct tag *
803: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
804: struct roff_node *n)
805: {
1.265 schwarze 806: struct roff_node *nch;
1.264 schwarze 807: struct tag *ret, *t;
1.266 schwarze 808: char *id, *href;
1.264 schwarze 809:
810: ret = NULL;
1.266 schwarze 811: id = href = NULL;
1.264 schwarze 812: if (n->flags & NODE_ID)
813: id = html_make_id(n, 1);
1.266 schwarze 814: if (n->flags & NODE_HREF)
1.269 schwarze 815: href = id == NULL ? html_make_id(n, 2) : id;
1.266 schwarze 816: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
817: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.264 schwarze 818: t = print_otag(h, elemtype, "ci", cattr, id);
819: if (ret == NULL) {
820: ret = t;
1.266 schwarze 821: if (href != NULL && (nch = n->child) != NULL) {
1.265 schwarze 822: /* man(7) is safe, it tags phrasing content only. */
823: if (n->tok > MDOC_MAX ||
824: htmltags[elemtype].flags & HTML_TOPHRASE)
825: nch = NULL;
826: else /* For mdoc(7), beware of nested blocks. */
827: while (nch != NULL && nch->type == ROFFT_TEXT)
828: nch = nch->next;
829: if (nch == NULL)
1.266 schwarze 830: print_otag(h, TAG_A, "chR", "permalink", href);
1.265 schwarze 831: }
1.264 schwarze 832: }
1.269 schwarze 833: free(id);
1.266 schwarze 834: if (id == NULL)
835: free(href);
1.264 schwarze 836: return ret;
1.14 kristaps 837: }
838:
1.29 kristaps 839: static void
1.184 schwarze 840: print_ctag(struct html *h, struct tag *tag)
1.14 kristaps 841: {
1.196 schwarze 842: int tflags;
1.156 schwarze 843:
1.252 schwarze 844: if (tag->closed == 0) {
845: tag->closed = 1;
846: if (tag == h->metaf)
847: h->metaf = NULL;
848: if (tag == h->tblt)
849: h->tblt = NULL;
850:
851: tflags = htmltags[tag->tag].flags;
852: if (tflags & HTML_INDENT)
853: h->indent--;
854: if (tflags & HTML_NOINDENT)
855: h->noindent--;
856: if (tflags & HTML_NLEND)
857: print_endline(h);
858: print_indent(h);
859: print_byte(h, '<');
860: print_byte(h, '/');
861: print_word(h, htmltags[tag->tag].name);
862: print_byte(h, '>');
863: if (tflags & HTML_NLAFTER)
864: print_endline(h);
865: }
866: if (tag->refcnt == 0) {
867: h->tag = tag->next;
868: free(tag);
869: }
1.14 kristaps 870: }
871:
1.51 kristaps 872: void
1.93 kristaps 873: print_gen_decls(struct html *h)
1.1 kristaps 874: {
1.197 schwarze 875: print_word(h, "<!DOCTYPE html>");
876: print_endline(h);
1.221 schwarze 877: }
878:
879: void
880: print_gen_comment(struct html *h, struct roff_node *n)
881: {
882: int wantblank;
883:
884: print_word(h, "<!-- This is an automatically generated file."
885: " Do not edit.");
886: h->indent = 1;
887: wantblank = 0;
888: while (n != NULL && n->type == ROFFT_COMMENT) {
889: if (strstr(n->string, "-->") == NULL &&
890: (wantblank || *n->string != '\0')) {
891: print_endline(h);
892: print_indent(h);
893: print_word(h, n->string);
894: wantblank = *n->string != '\0';
895: }
896: n = n->next;
897: }
898: if (wantblank)
899: print_endline(h);
900: print_word(h, " -->");
901: print_endline(h);
902: h->indent = 0;
1.1 kristaps 903: }
904:
1.51 kristaps 905: void
1.104 kristaps 906: print_text(struct html *h, const char *word)
1.1 kristaps 907: {
1.268 schwarze 908: print_tagged_text(h, word, NULL);
909: }
910:
911: void
912: print_tagged_text(struct html *h, const char *word, struct roff_node *n)
913: {
914: struct tag *t;
915: char *href;
916:
1.260 schwarze 917: /*
918: * Always wrap text in a paragraph unless already contained in
919: * some flow container; never put it directly into a section.
920: */
921:
922: if (h->tag->tag == TAG_SECTION)
923: print_otag(h, TAG_P, "c", "Pp");
924:
925: /* Output whitespace before this text? */
926:
1.197 schwarze 927: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.105 kristaps 928: if ( ! (HTML_KEEP & h->flags)) {
929: if (HTML_PREKEEP & h->flags)
930: h->flags |= HTML_KEEP;
1.197 schwarze 931: print_endword(h);
1.105 kristaps 932: } else
1.216 schwarze 933: print_word(h, " ");
1.105 kristaps 934: }
1.260 schwarze 935:
936: /*
1.268 schwarze 937: * Optionally switch fonts, optionally write a permalink, then
938: * print the text, optionally surrounded by HTML whitespace.
1.260 schwarze 939: */
1.30 kristaps 940:
1.255 schwarze 941: assert(h->metaf == NULL);
942: print_metaf(h);
943: print_indent(h);
1.268 schwarze 944:
1.269 schwarze 945: if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
1.268 schwarze 946: t = print_otag(h, TAG_A, "chR", "permalink", href);
947: free(href);
948: } else
949: t = NULL;
950:
1.195 schwarze 951: if ( ! print_encode(h, word, NULL, 0)) {
1.109 kristaps 952: if ( ! (h->flags & HTML_NONOSPACE))
953: h->flags &= ~HTML_NOSPACE;
1.183 schwarze 954: h->flags &= ~HTML_NONEWLINE;
1.149 kristaps 955: } else
1.183 schwarze 956: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.122 kristaps 957:
1.255 schwarze 958: if (h->metaf != NULL) {
1.122 kristaps 959: print_tagq(h, h->metaf);
960: h->metaf = NULL;
1.268 schwarze 961: } else if (t != NULL)
962: print_tagq(h, t);
1.113 schwarze 963:
964: h->flags &= ~HTML_IGNDELIM;
1.1 kristaps 965: }
1.30 kristaps 966:
1.51 kristaps 967: void
1.30 kristaps 968: print_tagq(struct html *h, const struct tag *until)
969: {
1.252 schwarze 970: struct tag *this, *next;
1.30 kristaps 971:
1.252 schwarze 972: for (this = h->tag; this != NULL; this = next) {
973: next = this == until ? NULL : this->next;
974: print_ctag(h, this);
1.30 kristaps 975: }
976: }
977:
1.250 schwarze 978: /*
979: * Close out all open elements up to but excluding suntil.
980: * Note that a paragraph just inside stays open together with it
981: * because paragraphs include subsequent phrasing content.
982: */
1.51 kristaps 983: void
1.30 kristaps 984: print_stagq(struct html *h, const struct tag *suntil)
985: {
1.252 schwarze 986: struct tag *this, *next;
1.30 kristaps 987:
1.252 schwarze 988: for (this = h->tag; this != NULL; this = next) {
989: next = this->next;
990: if (this == suntil || (next == suntil &&
991: (this->tag == TAG_P || this->tag == TAG_PRE)))
992: break;
993: print_ctag(h, this);
1.30 kristaps 994: }
1.171 kristaps 995: }
996:
1.197 schwarze 997:
998: /***********************************************************************
999: * Low level output functions.
1000: * They implement line breaking using a short static buffer.
1001: ***********************************************************************/
1002:
1003: /*
1004: * Buffer one HTML output byte.
1005: * If the buffer is full, flush and deactivate it and start a new line.
1006: * If the buffer is inactive, print directly.
1007: */
1008: static void
1009: print_byte(struct html *h, char c)
1010: {
1011: if ((h->flags & HTML_BUFFER) == 0) {
1012: putchar(c);
1013: h->col++;
1014: return;
1015: }
1016:
1017: if (h->col + h->bufcol < sizeof(h->buf)) {
1018: h->buf[h->bufcol++] = c;
1019: return;
1020: }
1021:
1022: putchar('\n');
1023: h->col = 0;
1024: print_indent(h);
1025: putchar(' ');
1026: putchar(' ');
1027: fwrite(h->buf, h->bufcol, 1, stdout);
1028: putchar(c);
1029: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1030: h->bufcol = 0;
1031: h->flags &= ~HTML_BUFFER;
1032: }
1033:
1.196 schwarze 1034: /*
1035: * If something was printed on the current output line, end it.
1.197 schwarze 1036: * Not to be called right after print_indent().
1.196 schwarze 1037: */
1.202 schwarze 1038: void
1.197 schwarze 1039: print_endline(struct html *h)
1.196 schwarze 1040: {
1.197 schwarze 1041: if (h->col == 0)
1.196 schwarze 1042: return;
1043:
1.197 schwarze 1044: if (h->bufcol) {
1045: putchar(' ');
1046: fwrite(h->buf, h->bufcol, 1, stdout);
1047: h->bufcol = 0;
1048: }
1.196 schwarze 1049: putchar('\n');
1.197 schwarze 1050: h->col = 0;
1051: h->flags |= HTML_NOSPACE;
1052: h->flags &= ~HTML_BUFFER;
1053: }
1054:
1055: /*
1056: * Flush the HTML output buffer.
1057: * If it is inactive, activate it.
1058: */
1059: static void
1060: print_endword(struct html *h)
1061: {
1062: if (h->noindent) {
1063: print_byte(h, ' ');
1064: return;
1065: }
1066:
1067: if ((h->flags & HTML_BUFFER) == 0) {
1068: h->col++;
1069: h->flags |= HTML_BUFFER;
1070: } else if (h->bufcol) {
1071: putchar(' ');
1072: fwrite(h->buf, h->bufcol, 1, stdout);
1073: h->col += h->bufcol + 1;
1074: }
1075: h->bufcol = 0;
1.196 schwarze 1076: }
1077:
1078: /*
1079: * If at the beginning of a new output line,
1080: * perform indentation and mark the line as containing output.
1081: * Make sure to really produce some output right afterwards,
1082: * but do not use print_otag() for producing it.
1083: */
1084: static void
1.197 schwarze 1085: print_indent(struct html *h)
1.196 schwarze 1086: {
1.197 schwarze 1087: size_t i;
1.196 schwarze 1088:
1.261 schwarze 1089: if (h->col || h->noindent)
1.196 schwarze 1090: return;
1091:
1.261 schwarze 1092: h->col = h->indent * 2;
1093: for (i = 0; i < h->col; i++)
1094: putchar(' ');
1.197 schwarze 1095: }
1096:
1097: /*
1098: * Print or buffer some characters
1099: * depending on the current HTML output buffer state.
1100: */
1101: static void
1102: print_word(struct html *h, const char *cp)
1103: {
1104: while (*cp != '\0')
1105: print_byte(h, *cp++);
1.68 kristaps 1106: }
CVSweb