Annotation of mandoc/html.c, Revision 1.272
1.272 ! schwarze 1: /* $Id: html.c,v 1.271 2020/10/16 17:22:43 schwarze Exp $ */
1.1 kristaps 2: /*
1.264 schwarze 3: * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
1.176 schwarze 4: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.186 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.29 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.186 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.29 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.264 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 kristaps 20: */
1.92 kristaps 21: #include "config.h"
22:
1.41 kristaps 23: #include <sys/types.h>
1.240 schwarze 24: #include <sys/stat.h>
1.30 kristaps 25:
1.1 kristaps 26: #include <assert.h>
1.68 kristaps 27: #include <ctype.h>
1.76 kristaps 28: #include <stdarg.h>
1.229 schwarze 29: #include <stddef.h>
1.29 kristaps 30: #include <stdio.h>
1.63 kristaps 31: #include <stdint.h>
1.1 kristaps 32: #include <stdlib.h>
1.33 kristaps 33: #include <string.h>
1.45 kristaps 34: #include <unistd.h>
1.1 kristaps 35:
1.210 schwarze 36: #include "mandoc_aux.h"
1.229 schwarze 37: #include "mandoc_ohash.h"
1.100 kristaps 38: #include "mandoc.h"
1.210 schwarze 39: #include "roff.h"
1.58 kristaps 40: #include "out.h"
1.51 kristaps 41: #include "html.h"
1.186 schwarze 42: #include "manconf.h"
1.64 kristaps 43: #include "main.h"
1.63 kristaps 44:
1.29 kristaps 45: struct htmldata {
1.63 kristaps 46: const char *name;
1.29 kristaps 47: int flags;
1.257 schwarze 48: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.196 schwarze 55: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.257 schwarze 58: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.29 kristaps 60: };
1.7 kristaps 61:
1.29 kristaps 62: static const struct htmldata htmltags[TAG_MAX] = {
1.196 schwarze 63: {"html", HTML_NLALL},
64: {"head", HTML_NLALL | HTML_INDENT},
1.257 schwarze 65: {"meta", HTML_NOSTACK | HTML_NLALL},
66: {"link", HTML_NOSTACK | HTML_NLALL},
67: {"style", HTML_NLALL | HTML_INDENT},
68: {"title", HTML_NLAROUND},
1.196 schwarze 69: {"body", HTML_NLALL},
70: {"div", HTML_NLAROUND},
1.253 schwarze 71: {"section", HTML_NLALL},
1.196 schwarze 72: {"table", HTML_NLALL | HTML_INDENT},
73: {"tr", HTML_NLALL | HTML_INDENT},
74: {"td", HTML_NLAROUND},
75: {"li", HTML_NLAROUND | HTML_INDENT},
76: {"ul", HTML_NLALL | HTML_INDENT},
77: {"ol", HTML_NLALL | HTML_INDENT},
78: {"dl", HTML_NLALL | HTML_INDENT},
79: {"dt", HTML_NLAROUND},
80: {"dd", HTML_NLAROUND | HTML_INDENT},
1.257 schwarze 81: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
82: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
83: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
1.271 schwarze 84: {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
1.257 schwarze 85: {"a", HTML_INPHRASE | HTML_TOPHRASE},
86: {"b", HTML_INPHRASE | HTML_TOPHRASE},
87: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
88: {"code", HTML_INPHRASE | HTML_TOPHRASE},
89: {"i", HTML_INPHRASE | HTML_TOPHRASE},
90: {"small", HTML_INPHRASE | HTML_TOPHRASE},
91: {"span", HTML_INPHRASE | HTML_TOPHRASE},
92: {"var", HTML_INPHRASE | HTML_TOPHRASE},
93: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.263 schwarze 94: {"mark", HTML_INPHRASE },
1.257 schwarze 95: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.196 schwarze 96: {"mrow", 0},
97: {"mi", 0},
1.215 schwarze 98: {"mn", 0},
1.196 schwarze 99: {"mo", 0},
100: {"msup", 0},
101: {"msub", 0},
102: {"msubsup", 0},
103: {"mfrac", 0},
104: {"msqrt", 0},
105: {"mfenced", 0},
106: {"mtable", 0},
107: {"mtr", 0},
108: {"mtd", 0},
109: {"munderover", 0},
110: {"munder", 0},
111: {"mover", 0},
1.90 kristaps 112: };
113:
1.229 schwarze 114: /* Avoid duplicate HTML id= attributes. */
1.269 schwarze 115:
116: struct id_entry {
117: int ord; /* Ordinal number of the latest occurrence. */
118: char id[]; /* The id= attribute without any ordinal suffix. */
119: };
1.229 schwarze 120: static struct ohash id_unique;
121:
1.254 schwarze 122: static void html_reset_internal(struct html *);
1.197 schwarze 123: static void print_byte(struct html *, char);
124: static void print_endword(struct html *);
125: static void print_indent(struct html *);
126: static void print_word(struct html *, const char *);
127:
1.184 schwarze 128: static void print_ctag(struct html *, struct tag *);
1.197 schwarze 129: static int print_escape(struct html *, char);
1.195 schwarze 130: static int print_encode(struct html *, const char *, const char *, int);
131: static void print_href(struct html *, const char *, const char *, int);
1.255 schwarze 132: static void print_metaf(struct html *);
1.82 kristaps 133:
1.156 schwarze 134:
1.180 schwarze 135: void *
1.191 schwarze 136: html_alloc(const struct manoutput *outopts)
1.10 kristaps 137: {
1.30 kristaps 138: struct html *h;
139:
1.128 kristaps 140: h = mandoc_calloc(1, sizeof(struct html));
1.10 kristaps 141:
1.204 schwarze 142: h->tag = NULL;
1.272 ! schwarze 143: h->metac = h->metal = ESCAPE_FONTROMAN;
1.186 schwarze 144: h->style = outopts->style;
1.240 schwarze 145: if ((h->base_man1 = outopts->man) == NULL)
146: h->base_man2 = NULL;
147: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
148: *h->base_man2++ = '\0';
1.186 schwarze 149: h->base_includes = outopts->includes;
150: if (outopts->fragment)
151: h->oflags |= HTML_FRAGMENT;
1.241 schwarze 152: if (outopts->toc)
153: h->oflags |= HTML_TOC;
1.43 kristaps 154:
1.269 schwarze 155: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.229 schwarze 156:
1.188 schwarze 157: return h;
1.29 kristaps 158: }
1.10 kristaps 159:
1.254 schwarze 160: static void
161: html_reset_internal(struct html *h)
1.29 kristaps 162: {
1.30 kristaps 163: struct tag *tag;
1.269 schwarze 164: struct id_entry *entry;
1.229 schwarze 165: unsigned int slot;
1.30 kristaps 166:
1.204 schwarze 167: while ((tag = h->tag) != NULL) {
168: h->tag = tag->next;
1.30 kristaps 169: free(tag);
170: }
1.269 schwarze 171: entry = ohash_first(&id_unique, &slot);
172: while (entry != NULL) {
173: free(entry);
174: entry = ohash_next(&id_unique, &slot);
1.229 schwarze 175: }
176: ohash_delete(&id_unique);
1.254 schwarze 177: }
178:
179: void
180: html_reset(void *p)
181: {
182: html_reset_internal(p);
1.269 schwarze 183: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.254 schwarze 184: }
185:
186: void
187: html_free(void *p)
188: {
189: html_reset_internal(p);
190: free(p);
1.10 kristaps 191: }
1.2 kristaps 192:
1.51 kristaps 193: void
1.29 kristaps 194: print_gen_head(struct html *h)
195: {
1.165 kristaps 196: struct tag *t;
1.41 kristaps 197:
1.194 schwarze 198: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.222 schwarze 199: if (h->style != NULL) {
200: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
201: h->style, "type", "text/css", "media", "all");
202: return;
203: }
1.165 kristaps 204:
1.168 kristaps 205: /*
1.222 schwarze 206: * Print a minimal embedded style sheet.
1.168 kristaps 207: */
1.196 schwarze 208:
1.194 schwarze 209: t = print_otag(h, TAG_STYLE, "");
1.196 schwarze 210: print_text(h, "table.head, table.foot { width: 100%; }");
1.197 schwarze 211: print_endline(h);
1.196 schwarze 212: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.197 schwarze 213: print_endline(h);
1.196 schwarze 214: print_text(h, "td.head-vol { text-align: center; }");
1.197 schwarze 215: print_endline(h);
1.256 schwarze 216: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.225 schwarze 217: print_endline(h);
1.256 schwarze 218: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.226 schwarze 219: print_endline(h);
1.256 schwarze 220: print_text(h, ".Ms { font-weight: bold; }");
1.228 schwarze 221: print_endline(h);
1.256 schwarze 222: print_text(h, ".Bl-diag ");
1.224 schwarze 223: print_byte(h, '>');
224: print_text(h, " dt { font-weight: bold; }");
1.223 schwarze 225: print_endline(h);
1.256 schwarze 226: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
227: "{ font-weight: bold; font-family: inherit; }");
1.165 kristaps 228: print_tagq(h, t);
1.4 kristaps 229: }
230:
1.255 schwarze 231: int
232: html_setfont(struct html *h, enum mandoc_esc font)
1.88 kristaps 233: {
1.255 schwarze 234: switch (font) {
1.156 schwarze 235: case ESCAPE_FONTPREV:
1.90 kristaps 236: font = h->metal;
1.88 kristaps 237: break;
1.156 schwarze 238: case ESCAPE_FONTITALIC:
239: case ESCAPE_FONTBOLD:
240: case ESCAPE_FONTBI:
1.242 schwarze 241: case ESCAPE_FONTCW:
1.255 schwarze 242: case ESCAPE_FONTROMAN:
1.242 schwarze 243: break;
1.156 schwarze 244: case ESCAPE_FONT:
1.255 schwarze 245: font = ESCAPE_FONTROMAN;
1.88 kristaps 246: break;
247: default:
1.255 schwarze 248: return 0;
1.88 kristaps 249: }
1.255 schwarze 250: h->metal = h->metac;
251: h->metac = font;
252: return 1;
253: }
1.88 kristaps 254:
1.255 schwarze 255: static void
256: print_metaf(struct html *h)
257: {
1.122 kristaps 258: if (h->metaf) {
259: print_tagq(h, h->metaf);
260: h->metaf = NULL;
261: }
1.255 schwarze 262: switch (h->metac) {
263: case ESCAPE_FONTITALIC:
1.194 schwarze 264: h->metaf = print_otag(h, TAG_I, "");
1.152 schwarze 265: break;
1.255 schwarze 266: case ESCAPE_FONTBOLD:
1.194 schwarze 267: h->metaf = print_otag(h, TAG_B, "");
1.152 schwarze 268: break;
1.255 schwarze 269: case ESCAPE_FONTBI:
1.194 schwarze 270: h->metaf = print_otag(h, TAG_B, "");
271: print_otag(h, TAG_I, "");
1.152 schwarze 272: break;
1.255 schwarze 273: case ESCAPE_FONTCW:
1.242 schwarze 274: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
275: break;
1.152 schwarze 276: default:
277: break;
278: }
1.248 schwarze 279: }
280:
1.249 schwarze 281: void
282: html_close_paragraph(struct html *h)
283: {
1.259 schwarze 284: struct tag *this, *next;
285: int flags;
1.249 schwarze 286:
1.259 schwarze 287: this = h->tag;
288: for (;;) {
289: next = this->next;
290: flags = htmltags[this->tag].flags;
291: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
292: print_ctag(h, this);
293: if ((flags & HTML_INPHRASE) == 0)
1.249 schwarze 294: break;
1.259 schwarze 295: this = next;
1.249 schwarze 296: }
297: }
298:
1.248 schwarze 299: /*
300: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
301: * TOKEN_NONE does not switch. The old mode is returned.
302: */
303: enum roff_tok
304: html_fillmode(struct html *h, enum roff_tok want)
305: {
306: struct tag *t;
307: enum roff_tok had;
308:
309: for (t = h->tag; t != NULL; t = t->next)
310: if (t->tag == TAG_PRE)
311: break;
312:
313: had = t == NULL ? ROFF_fi : ROFF_nf;
314:
315: if (want != had) {
316: switch (want) {
317: case ROFF_fi:
318: print_tagq(h, t);
319: break;
320: case ROFF_nf:
1.249 schwarze 321: html_close_paragraph(h);
1.248 schwarze 322: print_otag(h, TAG_PRE, "");
323: break;
324: case TOKEN_NONE:
325: break;
326: default:
327: abort();
328: }
329: }
330: return had;
1.210 schwarze 331: }
332:
1.264 schwarze 333: /*
334: * Allocate a string to be used for the "id=" attribute of an HTML
335: * element and/or as a segment identifier for a URI in an <a> element.
336: * The function may fail and return NULL if the node lacks text data
337: * to create the attribute from.
1.269 schwarze 338: * The caller is responsible for free(3)ing the returned string.
339: *
1.264 schwarze 340: * If the "unique" argument is non-zero, the "id_unique" ohash table
1.269 schwarze 341: * is used for de-duplication. If the "unique" argument is 1,
342: * it is the first time the function is called for this tag and
343: * location, so if an ordinal suffix is needed, it is incremented.
344: * If the "unique" argument is 2, it is the second time the function
345: * is called for this tag and location, so the ordinal suffix
346: * remains unchanged.
1.264 schwarze 347: */
1.210 schwarze 348: char *
1.229 schwarze 349: html_make_id(const struct roff_node *n, int unique)
1.210 schwarze 350: {
351: const struct roff_node *nch;
1.269 schwarze 352: struct id_entry *entry;
353: char *buf, *cp;
354: size_t len;
1.229 schwarze 355: unsigned int slot;
1.210 schwarze 356:
1.267 schwarze 357: if (n->tag != NULL)
358: buf = mandoc_strdup(n->tag);
1.264 schwarze 359: else {
360: switch (n->tok) {
361: case MDOC_Sh:
362: case MDOC_Ss:
363: case MDOC_Sx:
364: case MAN_SH:
365: case MAN_SS:
366: for (nch = n->child; nch != NULL; nch = nch->next)
367: if (nch->type != ROFFT_TEXT)
368: return NULL;
369: buf = NULL;
370: deroff(&buf, n);
371: if (buf == NULL)
372: return NULL;
373: break;
374: default:
1.265 schwarze 375: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.264 schwarze 376: return NULL;
377: buf = mandoc_strdup(n->child->string);
378: break;
379: }
380: }
1.210 schwarze 381:
1.230 schwarze 382: /*
383: * In ID attributes, only use ASCII characters that are
384: * permitted in URL-fragment strings according to the
385: * explicit list at:
386: * https://url.spec.whatwg.org/#url-fragment-string
1.270 schwarze 387: * In addition, reserve '~' for ordinal suffixes.
1.230 schwarze 388: */
1.210 schwarze 389:
390: for (cp = buf; *cp != '\0'; cp++)
1.230 schwarze 391: if (isalnum((unsigned char)*cp) == 0 &&
1.270 schwarze 392: strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
1.210 schwarze 393: *cp = '_';
394:
1.229 schwarze 395: if (unique == 0)
396: return buf;
397:
398: /* Avoid duplicate HTML id= attributes. */
399:
400: slot = ohash_qlookup(&id_unique, buf);
1.269 schwarze 401: if ((entry = ohash_find(&id_unique, slot)) == NULL) {
402: len = strlen(buf) + 1;
403: entry = mandoc_malloc(sizeof(*entry) + len);
404: entry->ord = 1;
405: memcpy(entry->id, buf, len);
406: ohash_insert(&id_unique, slot, entry);
407: } else if (unique == 1)
408: entry->ord++;
409:
410: if (entry->ord > 1) {
411: cp = buf;
1.270 schwarze 412: mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
1.269 schwarze 413: free(cp);
1.229 schwarze 414: }
1.210 schwarze 415: return buf;
1.88 kristaps 416: }
417:
1.85 kristaps 418: static int
1.197 schwarze 419: print_escape(struct html *h, char c)
1.159 schwarze 420: {
421:
422: switch (c) {
423: case '<':
1.197 schwarze 424: print_word(h, "<");
1.159 schwarze 425: break;
426: case '>':
1.197 schwarze 427: print_word(h, ">");
1.159 schwarze 428: break;
429: case '&':
1.197 schwarze 430: print_word(h, "&");
1.159 schwarze 431: break;
432: case '"':
1.197 schwarze 433: print_word(h, """);
1.159 schwarze 434: break;
435: case ASCII_NBRSP:
1.197 schwarze 436: print_word(h, " ");
1.159 schwarze 437: break;
438: case ASCII_HYPH:
1.197 schwarze 439: print_byte(h, '-');
1.189 schwarze 440: break;
1.159 schwarze 441: case ASCII_BREAK:
442: break;
443: default:
1.188 schwarze 444: return 0;
1.159 schwarze 445: }
1.188 schwarze 446: return 1;
1.159 schwarze 447: }
448:
449: static int
1.195 schwarze 450: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.29 kristaps 451: {
1.197 schwarze 452: char numbuf[16];
1.214 schwarze 453: const char *seq;
1.77 kristaps 454: size_t sz;
1.214 schwarze 455: int c, len, breakline, nospace;
1.132 kristaps 456: enum mandoc_esc esc;
1.214 schwarze 457: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.154 schwarze 458: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.14 kristaps 459:
1.195 schwarze 460: if (pend == NULL)
461: pend = strchr(p, '\0');
462:
1.214 schwarze 463: breakline = 0;
1.85 kristaps 464: nospace = 0;
465:
1.195 schwarze 466: while (p < pend) {
1.151 schwarze 467: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
468: h->flags &= ~HTML_SKIPCHAR;
469: p++;
470: continue;
471: }
472:
1.197 schwarze 473: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.214 schwarze 474: print_byte(h, *p);
475:
476: if (breakline &&
477: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.245 schwarze 478: print_otag(h, TAG_BR, "");
1.214 schwarze 479: breakline = 0;
480: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
481: p++;
482: continue;
483: }
1.77 kristaps 484:
1.195 schwarze 485: if (p >= pend)
1.132 kristaps 486: break;
487:
1.214 schwarze 488: if (*p == ' ') {
489: print_endword(h);
490: p++;
491: continue;
492: }
493:
1.197 schwarze 494: if (print_escape(h, *p++))
1.154 schwarze 495: continue;
1.77 kristaps 496:
1.132 kristaps 497: esc = mandoc_escape(&p, &seq, &len);
498: switch (esc) {
1.156 schwarze 499: case ESCAPE_FONT:
500: case ESCAPE_FONTPREV:
501: case ESCAPE_FONTBOLD:
502: case ESCAPE_FONTITALIC:
503: case ESCAPE_FONTBI:
1.242 schwarze 504: case ESCAPE_FONTCW:
1.156 schwarze 505: case ESCAPE_FONTROMAN:
1.243 schwarze 506: if (0 == norecurse) {
507: h->flags |= HTML_NOSPACE;
1.255 schwarze 508: if (html_setfont(h, esc))
509: print_metaf(h);
1.243 schwarze 510: h->flags &= ~HTML_NOSPACE;
511: }
1.151 schwarze 512: continue;
1.156 schwarze 513: case ESCAPE_SKIPCHAR:
1.151 schwarze 514: h->flags |= HTML_SKIPCHAR;
515: continue;
1.246 schwarze 516: case ESCAPE_ERROR:
517: continue;
1.151 schwarze 518: default:
519: break;
520: }
521:
522: if (h->flags & HTML_SKIPCHAR) {
523: h->flags &= ~HTML_SKIPCHAR;
524: continue;
525: }
526:
527: switch (esc) {
1.156 schwarze 528: case ESCAPE_UNICODE:
1.159 schwarze 529: /* Skip past "u" header. */
1.144 kristaps 530: c = mchars_num2uc(seq + 1, len - 1);
531: break;
1.156 schwarze 532: case ESCAPE_NUMBERED:
1.141 kristaps 533: c = mchars_num2char(seq, len);
1.181 schwarze 534: if (c < 0)
535: continue;
1.82 kristaps 536: break;
1.156 schwarze 537: case ESCAPE_SPECIAL:
1.191 schwarze 538: c = mchars_spec2cp(seq, len);
1.181 schwarze 539: if (c <= 0)
540: continue;
1.246 schwarze 541: break;
542: case ESCAPE_UNDEF:
543: c = *seq;
1.132 kristaps 544: break;
1.239 schwarze 545: case ESCAPE_DEVICE:
546: print_word(h, "html");
547: continue;
1.214 schwarze 548: case ESCAPE_BREAK:
549: breakline = 1;
550: continue;
1.156 schwarze 551: case ESCAPE_NOSPACE:
1.132 kristaps 552: if ('\0' == *p)
553: nospace = 1;
1.179 schwarze 554: continue;
1.185 schwarze 555: case ESCAPE_OVERSTRIKE:
556: if (len == 0)
557: continue;
558: c = seq[len - 1];
559: break;
1.82 kristaps 560: default:
1.179 schwarze 561: continue;
1.82 kristaps 562: }
1.181 schwarze 563: if ((c < 0x20 && c != 0x09) ||
564: (c > 0x7E && c < 0xA0))
1.179 schwarze 565: c = 0xFFFD;
1.197 schwarze 566: if (c > 0x7E) {
1.216 schwarze 567: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.197 schwarze 568: print_word(h, numbuf);
569: } else if (print_escape(h, c) == 0)
570: print_byte(h, c);
1.32 kristaps 571: }
1.85 kristaps 572:
1.188 schwarze 573: return nospace;
1.14 kristaps 574: }
575:
1.94 kristaps 576: static void
1.195 schwarze 577: print_href(struct html *h, const char *name, const char *sec, int man)
1.94 kristaps 578: {
1.240 schwarze 579: struct stat sb;
1.195 schwarze 580: const char *p, *pp;
1.240 schwarze 581: char *filename;
582:
583: if (man) {
584: pp = h->base_man1;
585: if (h->base_man2 != NULL) {
586: mandoc_asprintf(&filename, "%s.%s", name, sec);
587: if (stat(filename, &sb) == -1)
588: pp = h->base_man2;
589: free(filename);
590: }
591: } else
592: pp = h->base_includes;
1.195 schwarze 593:
594: while ((p = strchr(pp, '%')) != NULL) {
595: print_encode(h, pp, p, 1);
596: if (man && p[1] == 'S') {
597: if (sec == NULL)
1.197 schwarze 598: print_byte(h, '1');
1.195 schwarze 599: else
600: print_encode(h, sec, NULL, 1);
601: } else if ((man && p[1] == 'N') ||
602: (man == 0 && p[1] == 'I'))
603: print_encode(h, name, NULL, 1);
604: else
605: print_encode(h, p, p + 2, 1);
606: pp = p + 2;
607: }
608: if (*pp != '\0')
609: print_encode(h, pp, NULL, 1);
1.94 kristaps 610: }
611:
1.51 kristaps 612: struct tag *
1.194 schwarze 613: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.14 kristaps 614: {
1.194 schwarze 615: va_list ap;
1.30 kristaps 616: struct tag *t;
1.195 schwarze 617: const char *attr;
1.203 schwarze 618: char *arg1, *arg2;
1.244 schwarze 619: int style_written, tflags;
1.196 schwarze 620:
621: tflags = htmltags[tag].flags;
1.30 kristaps 622:
1.257 schwarze 623: /* Flow content is not allowed in phrasing context. */
624:
625: if ((tflags & HTML_INPHRASE) == 0) {
626: for (t = h->tag; t != NULL; t = t->next) {
627: if (t->closed)
628: continue;
629: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
630: break;
631: }
1.260 schwarze 632:
633: /*
634: * Always wrap phrasing elements in a paragraph
635: * unless already contained in some flow container;
636: * never put them directly into a section.
637: */
638:
639: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
640: print_otag(h, TAG_P, "c", "Pp");
1.257 schwarze 641:
1.204 schwarze 642: /* Push this tag onto the stack of open scopes. */
1.94 kristaps 643:
1.196 schwarze 644: if ((tflags & HTML_NOSTACK) == 0) {
1.128 kristaps 645: t = mandoc_malloc(sizeof(struct tag));
1.30 kristaps 646: t->tag = tag;
1.204 schwarze 647: t->next = h->tag;
1.252 schwarze 648: t->refcnt = 0;
649: t->closed = 0;
1.204 schwarze 650: h->tag = t;
1.30 kristaps 651: } else
652: t = NULL;
1.29 kristaps 653:
1.196 schwarze 654: if (tflags & HTML_NLBEFORE)
1.197 schwarze 655: print_endline(h);
656: if (h->col == 0)
657: print_indent(h);
1.196 schwarze 658: else if ((h->flags & HTML_NOSPACE) == 0) {
659: if (h->flags & HTML_KEEP)
1.216 schwarze 660: print_word(h, " ");
1.196 schwarze 661: else {
662: if (h->flags & HTML_PREKEEP)
663: h->flags |= HTML_KEEP;
1.197 schwarze 664: print_endword(h);
1.105 kristaps 665: }
1.196 schwarze 666: }
1.29 kristaps 667:
1.109 kristaps 668: if ( ! (h->flags & HTML_NONOSPACE))
669: h->flags &= ~HTML_NOSPACE;
1.110 kristaps 670: else
671: h->flags |= HTML_NOSPACE;
1.109 kristaps 672:
1.94 kristaps 673: /* Print out the tag name and attributes. */
674:
1.197 schwarze 675: print_byte(h, '<');
676: print_word(h, htmltags[tag].name);
1.194 schwarze 677:
678: va_start(ap, fmt);
679:
1.244 schwarze 680: while (*fmt != '\0' && *fmt != 's') {
1.203 schwarze 681:
1.238 schwarze 682: /* Parse attributes and arguments. */
1.203 schwarze 683:
684: arg1 = va_arg(ap, char *);
1.238 schwarze 685: arg2 = NULL;
1.194 schwarze 686: switch (*fmt++) {
687: case 'c':
1.195 schwarze 688: attr = "class";
1.194 schwarze 689: break;
690: case 'h':
1.195 schwarze 691: attr = "href";
1.194 schwarze 692: break;
693: case 'i':
1.195 schwarze 694: attr = "id";
1.194 schwarze 695: break;
696: case '?':
1.203 schwarze 697: attr = arg1;
698: arg1 = va_arg(ap, char *);
1.194 schwarze 699: break;
700: default:
701: abort();
702: }
1.203 schwarze 703: if (*fmt == 'M')
704: arg2 = va_arg(ap, char *);
705: if (arg1 == NULL)
706: continue;
707:
1.238 schwarze 708: /* Print the attributes. */
1.203 schwarze 709:
1.197 schwarze 710: print_byte(h, ' ');
711: print_word(h, attr);
712: print_byte(h, '=');
713: print_byte(h, '"');
1.195 schwarze 714: switch (*fmt) {
1.208 schwarze 715: case 'I':
716: print_href(h, arg1, NULL, 0);
717: fmt++;
718: break;
1.195 schwarze 719: case 'M':
1.203 schwarze 720: print_href(h, arg1, arg2, 1);
1.195 schwarze 721: fmt++;
722: break;
1.208 schwarze 723: case 'R':
724: print_byte(h, '#');
725: print_encode(h, arg1, NULL, 1);
1.195 schwarze 726: fmt++;
1.208 schwarze 727: break;
1.195 schwarze 728: default:
1.244 schwarze 729: print_encode(h, arg1, NULL, 1);
1.195 schwarze 730: break;
731: }
1.197 schwarze 732: print_byte(h, '"');
1.194 schwarze 733: }
1.244 schwarze 734:
735: style_written = 0;
736: while (*fmt++ == 's') {
737: arg1 = va_arg(ap, char *);
738: arg2 = va_arg(ap, char *);
739: if (arg2 == NULL)
740: continue;
741: print_byte(h, ' ');
742: if (style_written == 0) {
743: print_word(h, "style=\"");
744: style_written = 1;
745: }
746: print_word(h, arg1);
747: print_byte(h, ':');
748: print_byte(h, ' ');
749: print_word(h, arg2);
750: print_byte(h, ';');
751: }
752: if (style_written)
753: print_byte(h, '"');
754:
1.194 schwarze 755: va_end(ap);
1.94 kristaps 756:
1.172 kristaps 757: /* Accommodate for "well-formed" singleton escaping. */
1.94 kristaps 758:
1.257 schwarze 759: if (htmltags[tag].flags & HTML_NOSTACK)
1.197 schwarze 760: print_byte(h, '/');
1.93 kristaps 761:
1.197 schwarze 762: print_byte(h, '>');
1.14 kristaps 763:
1.196 schwarze 764: if (tflags & HTML_NLBEGIN)
1.197 schwarze 765: print_endline(h);
1.196 schwarze 766: else
767: h->flags |= HTML_NOSPACE;
1.117 kristaps 768:
1.196 schwarze 769: if (tflags & HTML_INDENT)
770: h->indent++;
771: if (tflags & HTML_NOINDENT)
772: h->noindent++;
1.117 kristaps 773:
1.188 schwarze 774: return t;
1.264 schwarze 775: }
776:
777: /*
778: * Print an element with an optional "id=" attribute.
1.265 schwarze 779: * If the element has phrasing content and an "id=" attribute,
780: * also add a permalink: outside if it can be in phrasing context,
781: * inside otherwise.
1.264 schwarze 782: */
783: struct tag *
784: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
785: struct roff_node *n)
786: {
1.265 schwarze 787: struct roff_node *nch;
1.264 schwarze 788: struct tag *ret, *t;
1.266 schwarze 789: char *id, *href;
1.264 schwarze 790:
791: ret = NULL;
1.266 schwarze 792: id = href = NULL;
1.264 schwarze 793: if (n->flags & NODE_ID)
794: id = html_make_id(n, 1);
1.266 schwarze 795: if (n->flags & NODE_HREF)
1.269 schwarze 796: href = id == NULL ? html_make_id(n, 2) : id;
1.266 schwarze 797: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
798: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.264 schwarze 799: t = print_otag(h, elemtype, "ci", cattr, id);
800: if (ret == NULL) {
801: ret = t;
1.266 schwarze 802: if (href != NULL && (nch = n->child) != NULL) {
1.265 schwarze 803: /* man(7) is safe, it tags phrasing content only. */
804: if (n->tok > MDOC_MAX ||
805: htmltags[elemtype].flags & HTML_TOPHRASE)
806: nch = NULL;
807: else /* For mdoc(7), beware of nested blocks. */
808: while (nch != NULL && nch->type == ROFFT_TEXT)
809: nch = nch->next;
810: if (nch == NULL)
1.266 schwarze 811: print_otag(h, TAG_A, "chR", "permalink", href);
1.265 schwarze 812: }
1.264 schwarze 813: }
1.269 schwarze 814: free(id);
1.266 schwarze 815: if (id == NULL)
816: free(href);
1.264 schwarze 817: return ret;
1.14 kristaps 818: }
819:
1.29 kristaps 820: static void
1.184 schwarze 821: print_ctag(struct html *h, struct tag *tag)
1.14 kristaps 822: {
1.196 schwarze 823: int tflags;
1.156 schwarze 824:
1.252 schwarze 825: if (tag->closed == 0) {
826: tag->closed = 1;
827: if (tag == h->metaf)
828: h->metaf = NULL;
829: if (tag == h->tblt)
830: h->tblt = NULL;
831:
832: tflags = htmltags[tag->tag].flags;
833: if (tflags & HTML_INDENT)
834: h->indent--;
835: if (tflags & HTML_NOINDENT)
836: h->noindent--;
837: if (tflags & HTML_NLEND)
838: print_endline(h);
839: print_indent(h);
840: print_byte(h, '<');
841: print_byte(h, '/');
842: print_word(h, htmltags[tag->tag].name);
843: print_byte(h, '>');
844: if (tflags & HTML_NLAFTER)
845: print_endline(h);
846: }
847: if (tag->refcnt == 0) {
848: h->tag = tag->next;
849: free(tag);
850: }
1.14 kristaps 851: }
852:
1.51 kristaps 853: void
1.93 kristaps 854: print_gen_decls(struct html *h)
1.1 kristaps 855: {
1.197 schwarze 856: print_word(h, "<!DOCTYPE html>");
857: print_endline(h);
1.221 schwarze 858: }
859:
860: void
861: print_gen_comment(struct html *h, struct roff_node *n)
862: {
863: int wantblank;
864:
865: print_word(h, "<!-- This is an automatically generated file."
866: " Do not edit.");
867: h->indent = 1;
868: wantblank = 0;
869: while (n != NULL && n->type == ROFFT_COMMENT) {
870: if (strstr(n->string, "-->") == NULL &&
871: (wantblank || *n->string != '\0')) {
872: print_endline(h);
873: print_indent(h);
874: print_word(h, n->string);
875: wantblank = *n->string != '\0';
876: }
877: n = n->next;
878: }
879: if (wantblank)
880: print_endline(h);
881: print_word(h, " -->");
882: print_endline(h);
883: h->indent = 0;
1.1 kristaps 884: }
885:
1.51 kristaps 886: void
1.104 kristaps 887: print_text(struct html *h, const char *word)
1.1 kristaps 888: {
1.268 schwarze 889: print_tagged_text(h, word, NULL);
890: }
891:
892: void
893: print_tagged_text(struct html *h, const char *word, struct roff_node *n)
894: {
895: struct tag *t;
896: char *href;
897:
1.260 schwarze 898: /*
899: * Always wrap text in a paragraph unless already contained in
900: * some flow container; never put it directly into a section.
901: */
902:
903: if (h->tag->tag == TAG_SECTION)
904: print_otag(h, TAG_P, "c", "Pp");
905:
906: /* Output whitespace before this text? */
907:
1.197 schwarze 908: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.105 kristaps 909: if ( ! (HTML_KEEP & h->flags)) {
910: if (HTML_PREKEEP & h->flags)
911: h->flags |= HTML_KEEP;
1.197 schwarze 912: print_endword(h);
1.105 kristaps 913: } else
1.216 schwarze 914: print_word(h, " ");
1.105 kristaps 915: }
1.260 schwarze 916:
917: /*
1.268 schwarze 918: * Optionally switch fonts, optionally write a permalink, then
919: * print the text, optionally surrounded by HTML whitespace.
1.260 schwarze 920: */
1.30 kristaps 921:
1.255 schwarze 922: assert(h->metaf == NULL);
923: print_metaf(h);
924: print_indent(h);
1.268 schwarze 925:
1.269 schwarze 926: if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
1.268 schwarze 927: t = print_otag(h, TAG_A, "chR", "permalink", href);
928: free(href);
929: } else
930: t = NULL;
931:
1.195 schwarze 932: if ( ! print_encode(h, word, NULL, 0)) {
1.109 kristaps 933: if ( ! (h->flags & HTML_NONOSPACE))
934: h->flags &= ~HTML_NOSPACE;
1.183 schwarze 935: h->flags &= ~HTML_NONEWLINE;
1.149 kristaps 936: } else
1.183 schwarze 937: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.122 kristaps 938:
1.255 schwarze 939: if (h->metaf != NULL) {
1.122 kristaps 940: print_tagq(h, h->metaf);
941: h->metaf = NULL;
1.268 schwarze 942: } else if (t != NULL)
943: print_tagq(h, t);
1.113 schwarze 944:
945: h->flags &= ~HTML_IGNDELIM;
1.1 kristaps 946: }
1.30 kristaps 947:
1.51 kristaps 948: void
1.30 kristaps 949: print_tagq(struct html *h, const struct tag *until)
950: {
1.252 schwarze 951: struct tag *this, *next;
1.30 kristaps 952:
1.252 schwarze 953: for (this = h->tag; this != NULL; this = next) {
954: next = this == until ? NULL : this->next;
955: print_ctag(h, this);
1.30 kristaps 956: }
957: }
958:
1.250 schwarze 959: /*
960: * Close out all open elements up to but excluding suntil.
961: * Note that a paragraph just inside stays open together with it
962: * because paragraphs include subsequent phrasing content.
963: */
1.51 kristaps 964: void
1.30 kristaps 965: print_stagq(struct html *h, const struct tag *suntil)
966: {
1.252 schwarze 967: struct tag *this, *next;
1.30 kristaps 968:
1.252 schwarze 969: for (this = h->tag; this != NULL; this = next) {
970: next = this->next;
971: if (this == suntil || (next == suntil &&
972: (this->tag == TAG_P || this->tag == TAG_PRE)))
973: break;
974: print_ctag(h, this);
1.30 kristaps 975: }
1.171 kristaps 976: }
977:
1.197 schwarze 978:
979: /***********************************************************************
980: * Low level output functions.
981: * They implement line breaking using a short static buffer.
982: ***********************************************************************/
983:
984: /*
985: * Buffer one HTML output byte.
986: * If the buffer is full, flush and deactivate it and start a new line.
987: * If the buffer is inactive, print directly.
988: */
989: static void
990: print_byte(struct html *h, char c)
991: {
992: if ((h->flags & HTML_BUFFER) == 0) {
993: putchar(c);
994: h->col++;
995: return;
996: }
997:
998: if (h->col + h->bufcol < sizeof(h->buf)) {
999: h->buf[h->bufcol++] = c;
1000: return;
1001: }
1002:
1003: putchar('\n');
1004: h->col = 0;
1005: print_indent(h);
1006: putchar(' ');
1007: putchar(' ');
1008: fwrite(h->buf, h->bufcol, 1, stdout);
1009: putchar(c);
1010: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1011: h->bufcol = 0;
1012: h->flags &= ~HTML_BUFFER;
1013: }
1014:
1.196 schwarze 1015: /*
1016: * If something was printed on the current output line, end it.
1.197 schwarze 1017: * Not to be called right after print_indent().
1.196 schwarze 1018: */
1.202 schwarze 1019: void
1.197 schwarze 1020: print_endline(struct html *h)
1.196 schwarze 1021: {
1.197 schwarze 1022: if (h->col == 0)
1.196 schwarze 1023: return;
1024:
1.197 schwarze 1025: if (h->bufcol) {
1026: putchar(' ');
1027: fwrite(h->buf, h->bufcol, 1, stdout);
1028: h->bufcol = 0;
1029: }
1.196 schwarze 1030: putchar('\n');
1.197 schwarze 1031: h->col = 0;
1032: h->flags |= HTML_NOSPACE;
1033: h->flags &= ~HTML_BUFFER;
1034: }
1035:
1036: /*
1037: * Flush the HTML output buffer.
1038: * If it is inactive, activate it.
1039: */
1040: static void
1041: print_endword(struct html *h)
1042: {
1043: if (h->noindent) {
1044: print_byte(h, ' ');
1045: return;
1046: }
1047:
1048: if ((h->flags & HTML_BUFFER) == 0) {
1049: h->col++;
1050: h->flags |= HTML_BUFFER;
1051: } else if (h->bufcol) {
1052: putchar(' ');
1053: fwrite(h->buf, h->bufcol, 1, stdout);
1054: h->col += h->bufcol + 1;
1055: }
1056: h->bufcol = 0;
1.196 schwarze 1057: }
1058:
1059: /*
1060: * If at the beginning of a new output line,
1061: * perform indentation and mark the line as containing output.
1062: * Make sure to really produce some output right afterwards,
1063: * but do not use print_otag() for producing it.
1064: */
1065: static void
1.197 schwarze 1066: print_indent(struct html *h)
1.196 schwarze 1067: {
1.197 schwarze 1068: size_t i;
1.196 schwarze 1069:
1.261 schwarze 1070: if (h->col || h->noindent)
1.196 schwarze 1071: return;
1072:
1.261 schwarze 1073: h->col = h->indent * 2;
1074: for (i = 0; i < h->col; i++)
1075: putchar(' ');
1.197 schwarze 1076: }
1077:
1078: /*
1079: * Print or buffer some characters
1080: * depending on the current HTML output buffer state.
1081: */
1082: static void
1083: print_word(struct html *h, const char *cp)
1084: {
1085: while (*cp != '\0')
1086: print_byte(h, *cp++);
1.68 kristaps 1087: }
CVSweb