Annotation of mandoc/html.c, Revision 1.275
1.275 ! schwarze 1: /* $Id: html.c,v 1.274 2021/08/10 12:55:03 schwarze Exp $ */
1.1 kristaps 2: /*
1.176 schwarze 3: * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.274 schwarze 4: * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
1.1 kristaps 5: *
6: * Permission to use, copy, modify, and distribute this software for any
1.29 kristaps 7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 9: *
1.186 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.29 kristaps 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.186 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.29 kristaps 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.264 schwarze 17: *
18: * Common functions for mandoc(1) HTML formatters.
19: * For use by individual formatters and by the main program.
1.1 kristaps 20: */
1.92 kristaps 21: #include "config.h"
22:
1.41 kristaps 23: #include <sys/types.h>
1.240 schwarze 24: #include <sys/stat.h>
1.30 kristaps 25:
1.1 kristaps 26: #include <assert.h>
1.68 kristaps 27: #include <ctype.h>
1.76 kristaps 28: #include <stdarg.h>
1.229 schwarze 29: #include <stddef.h>
1.29 kristaps 30: #include <stdio.h>
1.63 kristaps 31: #include <stdint.h>
1.1 kristaps 32: #include <stdlib.h>
1.33 kristaps 33: #include <string.h>
1.45 kristaps 34: #include <unistd.h>
1.1 kristaps 35:
1.210 schwarze 36: #include "mandoc_aux.h"
1.229 schwarze 37: #include "mandoc_ohash.h"
1.100 kristaps 38: #include "mandoc.h"
1.210 schwarze 39: #include "roff.h"
1.58 kristaps 40: #include "out.h"
1.51 kristaps 41: #include "html.h"
1.186 schwarze 42: #include "manconf.h"
1.64 kristaps 43: #include "main.h"
1.63 kristaps 44:
1.29 kristaps 45: struct htmldata {
1.63 kristaps 46: const char *name;
1.29 kristaps 47: int flags;
1.257 schwarze 48: #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49: #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50: #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51: #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52: #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53: #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54: #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
1.196 schwarze 55: #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56: #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57: #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
1.257 schwarze 58: #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59: #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
1.29 kristaps 60: };
1.7 kristaps 61:
1.29 kristaps 62: static const struct htmldata htmltags[TAG_MAX] = {
1.196 schwarze 63: {"html", HTML_NLALL},
64: {"head", HTML_NLALL | HTML_INDENT},
1.257 schwarze 65: {"meta", HTML_NOSTACK | HTML_NLALL},
66: {"link", HTML_NOSTACK | HTML_NLALL},
67: {"style", HTML_NLALL | HTML_INDENT},
68: {"title", HTML_NLAROUND},
1.196 schwarze 69: {"body", HTML_NLALL},
70: {"div", HTML_NLAROUND},
1.253 schwarze 71: {"section", HTML_NLALL},
1.196 schwarze 72: {"table", HTML_NLALL | HTML_INDENT},
73: {"tr", HTML_NLALL | HTML_INDENT},
74: {"td", HTML_NLAROUND},
75: {"li", HTML_NLAROUND | HTML_INDENT},
76: {"ul", HTML_NLALL | HTML_INDENT},
77: {"ol", HTML_NLALL | HTML_INDENT},
78: {"dl", HTML_NLALL | HTML_INDENT},
79: {"dt", HTML_NLAROUND},
80: {"dd", HTML_NLAROUND | HTML_INDENT},
1.257 schwarze 81: {"h1", HTML_TOPHRASE | HTML_NLAROUND},
82: {"h2", HTML_TOPHRASE | HTML_NLAROUND},
83: {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
1.271 schwarze 84: {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
1.257 schwarze 85: {"a", HTML_INPHRASE | HTML_TOPHRASE},
86: {"b", HTML_INPHRASE | HTML_TOPHRASE},
87: {"cite", HTML_INPHRASE | HTML_TOPHRASE},
88: {"code", HTML_INPHRASE | HTML_TOPHRASE},
89: {"i", HTML_INPHRASE | HTML_TOPHRASE},
90: {"small", HTML_INPHRASE | HTML_TOPHRASE},
91: {"span", HTML_INPHRASE | HTML_TOPHRASE},
92: {"var", HTML_INPHRASE | HTML_TOPHRASE},
93: {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
1.275 ! schwarze 94: {"hr", HTML_INPHRASE | HTML_NOSTACK},
1.263 schwarze 95: {"mark", HTML_INPHRASE },
1.257 schwarze 96: {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
1.196 schwarze 97: {"mrow", 0},
98: {"mi", 0},
1.215 schwarze 99: {"mn", 0},
1.196 schwarze 100: {"mo", 0},
101: {"msup", 0},
102: {"msub", 0},
103: {"msubsup", 0},
104: {"mfrac", 0},
105: {"msqrt", 0},
106: {"mfenced", 0},
107: {"mtable", 0},
108: {"mtr", 0},
109: {"mtd", 0},
110: {"munderover", 0},
111: {"munder", 0},
112: {"mover", 0},
1.90 kristaps 113: };
114:
1.229 schwarze 115: /* Avoid duplicate HTML id= attributes. */
1.269 schwarze 116:
117: struct id_entry {
118: int ord; /* Ordinal number of the latest occurrence. */
119: char id[]; /* The id= attribute without any ordinal suffix. */
120: };
1.229 schwarze 121: static struct ohash id_unique;
122:
1.254 schwarze 123: static void html_reset_internal(struct html *);
1.197 schwarze 124: static void print_byte(struct html *, char);
125: static void print_endword(struct html *);
126: static void print_indent(struct html *);
127: static void print_word(struct html *, const char *);
128:
1.184 schwarze 129: static void print_ctag(struct html *, struct tag *);
1.197 schwarze 130: static int print_escape(struct html *, char);
1.195 schwarze 131: static int print_encode(struct html *, const char *, const char *, int);
132: static void print_href(struct html *, const char *, const char *, int);
1.255 schwarze 133: static void print_metaf(struct html *);
1.82 kristaps 134:
1.156 schwarze 135:
1.180 schwarze 136: void *
1.191 schwarze 137: html_alloc(const struct manoutput *outopts)
1.10 kristaps 138: {
1.30 kristaps 139: struct html *h;
140:
1.128 kristaps 141: h = mandoc_calloc(1, sizeof(struct html));
1.10 kristaps 142:
1.204 schwarze 143: h->tag = NULL;
1.272 schwarze 144: h->metac = h->metal = ESCAPE_FONTROMAN;
1.186 schwarze 145: h->style = outopts->style;
1.240 schwarze 146: if ((h->base_man1 = outopts->man) == NULL)
147: h->base_man2 = NULL;
148: else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
149: *h->base_man2++ = '\0';
1.186 schwarze 150: h->base_includes = outopts->includes;
151: if (outopts->fragment)
152: h->oflags |= HTML_FRAGMENT;
1.241 schwarze 153: if (outopts->toc)
154: h->oflags |= HTML_TOC;
1.43 kristaps 155:
1.269 schwarze 156: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.229 schwarze 157:
1.188 schwarze 158: return h;
1.29 kristaps 159: }
1.10 kristaps 160:
1.254 schwarze 161: static void
162: html_reset_internal(struct html *h)
1.29 kristaps 163: {
1.30 kristaps 164: struct tag *tag;
1.269 schwarze 165: struct id_entry *entry;
1.229 schwarze 166: unsigned int slot;
1.30 kristaps 167:
1.204 schwarze 168: while ((tag = h->tag) != NULL) {
169: h->tag = tag->next;
1.30 kristaps 170: free(tag);
171: }
1.269 schwarze 172: entry = ohash_first(&id_unique, &slot);
173: while (entry != NULL) {
174: free(entry);
175: entry = ohash_next(&id_unique, &slot);
1.229 schwarze 176: }
177: ohash_delete(&id_unique);
1.254 schwarze 178: }
179:
180: void
181: html_reset(void *p)
182: {
183: html_reset_internal(p);
1.269 schwarze 184: mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1.254 schwarze 185: }
186:
187: void
188: html_free(void *p)
189: {
190: html_reset_internal(p);
191: free(p);
1.10 kristaps 192: }
1.2 kristaps 193:
1.51 kristaps 194: void
1.29 kristaps 195: print_gen_head(struct html *h)
196: {
1.165 kristaps 197: struct tag *t;
1.41 kristaps 198:
1.194 schwarze 199: print_otag(h, TAG_META, "?", "charset", "utf-8");
1.273 schwarze 200: print_otag(h, TAG_META, "??", "name", "viewport",
201: "content", "width=device-width, initial-scale=1.0");
1.222 schwarze 202: if (h->style != NULL) {
203: print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
204: h->style, "type", "text/css", "media", "all");
205: return;
206: }
1.165 kristaps 207:
1.168 kristaps 208: /*
1.222 schwarze 209: * Print a minimal embedded style sheet.
1.168 kristaps 210: */
1.196 schwarze 211:
1.194 schwarze 212: t = print_otag(h, TAG_STYLE, "");
1.196 schwarze 213: print_text(h, "table.head, table.foot { width: 100%; }");
1.197 schwarze 214: print_endline(h);
1.196 schwarze 215: print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
1.197 schwarze 216: print_endline(h);
1.196 schwarze 217: print_text(h, "td.head-vol { text-align: center; }");
1.197 schwarze 218: print_endline(h);
1.256 schwarze 219: print_text(h, ".Nd, .Bf, .Op { display: inline; }");
1.225 schwarze 220: print_endline(h);
1.256 schwarze 221: print_text(h, ".Pa, .Ad { font-style: italic; }");
1.226 schwarze 222: print_endline(h);
1.256 schwarze 223: print_text(h, ".Ms { font-weight: bold; }");
1.228 schwarze 224: print_endline(h);
1.256 schwarze 225: print_text(h, ".Bl-diag ");
1.224 schwarze 226: print_byte(h, '>');
227: print_text(h, " dt { font-weight: bold; }");
1.223 schwarze 228: print_endline(h);
1.256 schwarze 229: print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
230: "{ font-weight: bold; font-family: inherit; }");
1.165 kristaps 231: print_tagq(h, t);
1.4 kristaps 232: }
233:
1.255 schwarze 234: int
235: html_setfont(struct html *h, enum mandoc_esc font)
1.88 kristaps 236: {
1.255 schwarze 237: switch (font) {
1.156 schwarze 238: case ESCAPE_FONTPREV:
1.90 kristaps 239: font = h->metal;
1.88 kristaps 240: break;
1.156 schwarze 241: case ESCAPE_FONTITALIC:
242: case ESCAPE_FONTBOLD:
243: case ESCAPE_FONTBI:
1.255 schwarze 244: case ESCAPE_FONTROMAN:
1.274 schwarze 245: case ESCAPE_FONTCR:
246: case ESCAPE_FONTCB:
247: case ESCAPE_FONTCI:
1.242 schwarze 248: break;
1.156 schwarze 249: case ESCAPE_FONT:
1.255 schwarze 250: font = ESCAPE_FONTROMAN;
1.88 kristaps 251: break;
252: default:
1.255 schwarze 253: return 0;
1.88 kristaps 254: }
1.255 schwarze 255: h->metal = h->metac;
256: h->metac = font;
257: return 1;
258: }
1.88 kristaps 259:
1.255 schwarze 260: static void
261: print_metaf(struct html *h)
262: {
1.122 kristaps 263: if (h->metaf) {
264: print_tagq(h, h->metaf);
265: h->metaf = NULL;
266: }
1.255 schwarze 267: switch (h->metac) {
268: case ESCAPE_FONTITALIC:
1.194 schwarze 269: h->metaf = print_otag(h, TAG_I, "");
1.152 schwarze 270: break;
1.255 schwarze 271: case ESCAPE_FONTBOLD:
1.194 schwarze 272: h->metaf = print_otag(h, TAG_B, "");
1.152 schwarze 273: break;
1.255 schwarze 274: case ESCAPE_FONTBI:
1.194 schwarze 275: h->metaf = print_otag(h, TAG_B, "");
276: print_otag(h, TAG_I, "");
1.152 schwarze 277: break;
1.274 schwarze 278: case ESCAPE_FONTCR:
1.242 schwarze 279: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
280: break;
1.274 schwarze 281: case ESCAPE_FONTCB:
282: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
283: print_otag(h, TAG_B, "");
284: break;
285: case ESCAPE_FONTCI:
286: h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
287: print_otag(h, TAG_I, "");
288: break;
1.152 schwarze 289: default:
290: break;
291: }
1.248 schwarze 292: }
293:
1.249 schwarze 294: void
295: html_close_paragraph(struct html *h)
296: {
1.259 schwarze 297: struct tag *this, *next;
298: int flags;
1.249 schwarze 299:
1.259 schwarze 300: this = h->tag;
301: for (;;) {
302: next = this->next;
303: flags = htmltags[this->tag].flags;
304: if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
305: print_ctag(h, this);
306: if ((flags & HTML_INPHRASE) == 0)
1.249 schwarze 307: break;
1.259 schwarze 308: this = next;
1.249 schwarze 309: }
310: }
311:
1.248 schwarze 312: /*
313: * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
314: * TOKEN_NONE does not switch. The old mode is returned.
315: */
316: enum roff_tok
317: html_fillmode(struct html *h, enum roff_tok want)
318: {
319: struct tag *t;
320: enum roff_tok had;
321:
322: for (t = h->tag; t != NULL; t = t->next)
323: if (t->tag == TAG_PRE)
324: break;
325:
326: had = t == NULL ? ROFF_fi : ROFF_nf;
327:
328: if (want != had) {
329: switch (want) {
330: case ROFF_fi:
331: print_tagq(h, t);
332: break;
333: case ROFF_nf:
1.249 schwarze 334: html_close_paragraph(h);
1.248 schwarze 335: print_otag(h, TAG_PRE, "");
336: break;
337: case TOKEN_NONE:
338: break;
339: default:
340: abort();
341: }
342: }
343: return had;
1.210 schwarze 344: }
345:
1.264 schwarze 346: /*
347: * Allocate a string to be used for the "id=" attribute of an HTML
348: * element and/or as a segment identifier for a URI in an <a> element.
349: * The function may fail and return NULL if the node lacks text data
350: * to create the attribute from.
1.269 schwarze 351: * The caller is responsible for free(3)ing the returned string.
352: *
1.264 schwarze 353: * If the "unique" argument is non-zero, the "id_unique" ohash table
1.269 schwarze 354: * is used for de-duplication. If the "unique" argument is 1,
355: * it is the first time the function is called for this tag and
356: * location, so if an ordinal suffix is needed, it is incremented.
357: * If the "unique" argument is 2, it is the second time the function
358: * is called for this tag and location, so the ordinal suffix
359: * remains unchanged.
1.264 schwarze 360: */
1.210 schwarze 361: char *
1.229 schwarze 362: html_make_id(const struct roff_node *n, int unique)
1.210 schwarze 363: {
364: const struct roff_node *nch;
1.269 schwarze 365: struct id_entry *entry;
366: char *buf, *cp;
367: size_t len;
1.229 schwarze 368: unsigned int slot;
1.210 schwarze 369:
1.267 schwarze 370: if (n->tag != NULL)
371: buf = mandoc_strdup(n->tag);
1.264 schwarze 372: else {
373: switch (n->tok) {
374: case MDOC_Sh:
375: case MDOC_Ss:
376: case MDOC_Sx:
377: case MAN_SH:
378: case MAN_SS:
379: for (nch = n->child; nch != NULL; nch = nch->next)
380: if (nch->type != ROFFT_TEXT)
381: return NULL;
382: buf = NULL;
383: deroff(&buf, n);
384: if (buf == NULL)
385: return NULL;
386: break;
387: default:
1.265 schwarze 388: if (n->child == NULL || n->child->type != ROFFT_TEXT)
1.264 schwarze 389: return NULL;
390: buf = mandoc_strdup(n->child->string);
391: break;
392: }
393: }
1.210 schwarze 394:
1.230 schwarze 395: /*
396: * In ID attributes, only use ASCII characters that are
397: * permitted in URL-fragment strings according to the
398: * explicit list at:
399: * https://url.spec.whatwg.org/#url-fragment-string
1.270 schwarze 400: * In addition, reserve '~' for ordinal suffixes.
1.230 schwarze 401: */
1.210 schwarze 402:
403: for (cp = buf; *cp != '\0'; cp++)
1.230 schwarze 404: if (isalnum((unsigned char)*cp) == 0 &&
1.270 schwarze 405: strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
1.210 schwarze 406: *cp = '_';
407:
1.229 schwarze 408: if (unique == 0)
409: return buf;
410:
411: /* Avoid duplicate HTML id= attributes. */
412:
413: slot = ohash_qlookup(&id_unique, buf);
1.269 schwarze 414: if ((entry = ohash_find(&id_unique, slot)) == NULL) {
415: len = strlen(buf) + 1;
416: entry = mandoc_malloc(sizeof(*entry) + len);
417: entry->ord = 1;
418: memcpy(entry->id, buf, len);
419: ohash_insert(&id_unique, slot, entry);
420: } else if (unique == 1)
421: entry->ord++;
422:
423: if (entry->ord > 1) {
424: cp = buf;
1.270 schwarze 425: mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
1.269 schwarze 426: free(cp);
1.229 schwarze 427: }
1.210 schwarze 428: return buf;
1.88 kristaps 429: }
430:
1.85 kristaps 431: static int
1.197 schwarze 432: print_escape(struct html *h, char c)
1.159 schwarze 433: {
434:
435: switch (c) {
436: case '<':
1.197 schwarze 437: print_word(h, "<");
1.159 schwarze 438: break;
439: case '>':
1.197 schwarze 440: print_word(h, ">");
1.159 schwarze 441: break;
442: case '&':
1.197 schwarze 443: print_word(h, "&");
1.159 schwarze 444: break;
445: case '"':
1.197 schwarze 446: print_word(h, """);
1.159 schwarze 447: break;
448: case ASCII_NBRSP:
1.197 schwarze 449: print_word(h, " ");
1.159 schwarze 450: break;
451: case ASCII_HYPH:
1.197 schwarze 452: print_byte(h, '-');
1.189 schwarze 453: break;
1.159 schwarze 454: case ASCII_BREAK:
455: break;
456: default:
1.188 schwarze 457: return 0;
1.159 schwarze 458: }
1.188 schwarze 459: return 1;
1.159 schwarze 460: }
461:
462: static int
1.195 schwarze 463: print_encode(struct html *h, const char *p, const char *pend, int norecurse)
1.29 kristaps 464: {
1.197 schwarze 465: char numbuf[16];
1.214 schwarze 466: const char *seq;
1.77 kristaps 467: size_t sz;
1.214 schwarze 468: int c, len, breakline, nospace;
1.132 kristaps 469: enum mandoc_esc esc;
1.214 schwarze 470: static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
1.154 schwarze 471: ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
1.14 kristaps 472:
1.195 schwarze 473: if (pend == NULL)
474: pend = strchr(p, '\0');
475:
1.214 schwarze 476: breakline = 0;
1.85 kristaps 477: nospace = 0;
478:
1.195 schwarze 479: while (p < pend) {
1.151 schwarze 480: if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
481: h->flags &= ~HTML_SKIPCHAR;
482: p++;
483: continue;
484: }
485:
1.197 schwarze 486: for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
1.214 schwarze 487: print_byte(h, *p);
488:
489: if (breakline &&
490: (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
1.245 schwarze 491: print_otag(h, TAG_BR, "");
1.214 schwarze 492: breakline = 0;
493: while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
494: p++;
495: continue;
496: }
1.77 kristaps 497:
1.195 schwarze 498: if (p >= pend)
1.132 kristaps 499: break;
500:
1.214 schwarze 501: if (*p == ' ') {
502: print_endword(h);
503: p++;
504: continue;
505: }
506:
1.197 schwarze 507: if (print_escape(h, *p++))
1.154 schwarze 508: continue;
1.77 kristaps 509:
1.132 kristaps 510: esc = mandoc_escape(&p, &seq, &len);
511: switch (esc) {
1.156 schwarze 512: case ESCAPE_FONT:
513: case ESCAPE_FONTPREV:
514: case ESCAPE_FONTBOLD:
515: case ESCAPE_FONTITALIC:
516: case ESCAPE_FONTBI:
517: case ESCAPE_FONTROMAN:
1.274 schwarze 518: case ESCAPE_FONTCR:
519: case ESCAPE_FONTCB:
520: case ESCAPE_FONTCI:
1.243 schwarze 521: if (0 == norecurse) {
522: h->flags |= HTML_NOSPACE;
1.255 schwarze 523: if (html_setfont(h, esc))
524: print_metaf(h);
1.243 schwarze 525: h->flags &= ~HTML_NOSPACE;
526: }
1.151 schwarze 527: continue;
1.156 schwarze 528: case ESCAPE_SKIPCHAR:
1.151 schwarze 529: h->flags |= HTML_SKIPCHAR;
530: continue;
1.246 schwarze 531: case ESCAPE_ERROR:
532: continue;
1.151 schwarze 533: default:
534: break;
535: }
536:
537: if (h->flags & HTML_SKIPCHAR) {
538: h->flags &= ~HTML_SKIPCHAR;
539: continue;
540: }
541:
542: switch (esc) {
1.156 schwarze 543: case ESCAPE_UNICODE:
1.159 schwarze 544: /* Skip past "u" header. */
1.144 kristaps 545: c = mchars_num2uc(seq + 1, len - 1);
546: break;
1.156 schwarze 547: case ESCAPE_NUMBERED:
1.141 kristaps 548: c = mchars_num2char(seq, len);
1.181 schwarze 549: if (c < 0)
550: continue;
1.82 kristaps 551: break;
1.156 schwarze 552: case ESCAPE_SPECIAL:
1.191 schwarze 553: c = mchars_spec2cp(seq, len);
1.181 schwarze 554: if (c <= 0)
555: continue;
1.246 schwarze 556: break;
557: case ESCAPE_UNDEF:
558: c = *seq;
1.132 kristaps 559: break;
1.239 schwarze 560: case ESCAPE_DEVICE:
561: print_word(h, "html");
562: continue;
1.214 schwarze 563: case ESCAPE_BREAK:
564: breakline = 1;
565: continue;
1.156 schwarze 566: case ESCAPE_NOSPACE:
1.132 kristaps 567: if ('\0' == *p)
568: nospace = 1;
1.179 schwarze 569: continue;
1.185 schwarze 570: case ESCAPE_OVERSTRIKE:
571: if (len == 0)
572: continue;
573: c = seq[len - 1];
574: break;
1.82 kristaps 575: default:
1.179 schwarze 576: continue;
1.82 kristaps 577: }
1.181 schwarze 578: if ((c < 0x20 && c != 0x09) ||
579: (c > 0x7E && c < 0xA0))
1.179 schwarze 580: c = 0xFFFD;
1.197 schwarze 581: if (c > 0x7E) {
1.216 schwarze 582: (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
1.197 schwarze 583: print_word(h, numbuf);
584: } else if (print_escape(h, c) == 0)
585: print_byte(h, c);
1.32 kristaps 586: }
1.85 kristaps 587:
1.188 schwarze 588: return nospace;
1.14 kristaps 589: }
590:
1.94 kristaps 591: static void
1.195 schwarze 592: print_href(struct html *h, const char *name, const char *sec, int man)
1.94 kristaps 593: {
1.240 schwarze 594: struct stat sb;
1.195 schwarze 595: const char *p, *pp;
1.240 schwarze 596: char *filename;
597:
598: if (man) {
599: pp = h->base_man1;
600: if (h->base_man2 != NULL) {
601: mandoc_asprintf(&filename, "%s.%s", name, sec);
602: if (stat(filename, &sb) == -1)
603: pp = h->base_man2;
604: free(filename);
605: }
606: } else
607: pp = h->base_includes;
1.195 schwarze 608:
609: while ((p = strchr(pp, '%')) != NULL) {
610: print_encode(h, pp, p, 1);
611: if (man && p[1] == 'S') {
612: if (sec == NULL)
1.197 schwarze 613: print_byte(h, '1');
1.195 schwarze 614: else
615: print_encode(h, sec, NULL, 1);
616: } else if ((man && p[1] == 'N') ||
617: (man == 0 && p[1] == 'I'))
618: print_encode(h, name, NULL, 1);
619: else
620: print_encode(h, p, p + 2, 1);
621: pp = p + 2;
622: }
623: if (*pp != '\0')
624: print_encode(h, pp, NULL, 1);
1.94 kristaps 625: }
626:
1.51 kristaps 627: struct tag *
1.194 schwarze 628: print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
1.14 kristaps 629: {
1.194 schwarze 630: va_list ap;
1.30 kristaps 631: struct tag *t;
1.195 schwarze 632: const char *attr;
1.203 schwarze 633: char *arg1, *arg2;
1.244 schwarze 634: int style_written, tflags;
1.196 schwarze 635:
636: tflags = htmltags[tag].flags;
1.30 kristaps 637:
1.257 schwarze 638: /* Flow content is not allowed in phrasing context. */
639:
640: if ((tflags & HTML_INPHRASE) == 0) {
641: for (t = h->tag; t != NULL; t = t->next) {
642: if (t->closed)
643: continue;
644: assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
645: break;
646: }
1.260 schwarze 647:
648: /*
649: * Always wrap phrasing elements in a paragraph
650: * unless already contained in some flow container;
651: * never put them directly into a section.
652: */
653:
654: } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
655: print_otag(h, TAG_P, "c", "Pp");
1.257 schwarze 656:
1.204 schwarze 657: /* Push this tag onto the stack of open scopes. */
1.94 kristaps 658:
1.196 schwarze 659: if ((tflags & HTML_NOSTACK) == 0) {
1.128 kristaps 660: t = mandoc_malloc(sizeof(struct tag));
1.30 kristaps 661: t->tag = tag;
1.204 schwarze 662: t->next = h->tag;
1.252 schwarze 663: t->refcnt = 0;
664: t->closed = 0;
1.204 schwarze 665: h->tag = t;
1.30 kristaps 666: } else
667: t = NULL;
1.29 kristaps 668:
1.196 schwarze 669: if (tflags & HTML_NLBEFORE)
1.197 schwarze 670: print_endline(h);
671: if (h->col == 0)
672: print_indent(h);
1.196 schwarze 673: else if ((h->flags & HTML_NOSPACE) == 0) {
674: if (h->flags & HTML_KEEP)
1.216 schwarze 675: print_word(h, " ");
1.196 schwarze 676: else {
677: if (h->flags & HTML_PREKEEP)
678: h->flags |= HTML_KEEP;
1.197 schwarze 679: print_endword(h);
1.105 kristaps 680: }
1.196 schwarze 681: }
1.29 kristaps 682:
1.109 kristaps 683: if ( ! (h->flags & HTML_NONOSPACE))
684: h->flags &= ~HTML_NOSPACE;
1.110 kristaps 685: else
686: h->flags |= HTML_NOSPACE;
1.109 kristaps 687:
1.94 kristaps 688: /* Print out the tag name and attributes. */
689:
1.197 schwarze 690: print_byte(h, '<');
691: print_word(h, htmltags[tag].name);
1.194 schwarze 692:
693: va_start(ap, fmt);
694:
1.244 schwarze 695: while (*fmt != '\0' && *fmt != 's') {
1.203 schwarze 696:
1.238 schwarze 697: /* Parse attributes and arguments. */
1.203 schwarze 698:
699: arg1 = va_arg(ap, char *);
1.238 schwarze 700: arg2 = NULL;
1.194 schwarze 701: switch (*fmt++) {
702: case 'c':
1.195 schwarze 703: attr = "class";
1.194 schwarze 704: break;
705: case 'h':
1.195 schwarze 706: attr = "href";
1.194 schwarze 707: break;
708: case 'i':
1.195 schwarze 709: attr = "id";
1.194 schwarze 710: break;
711: case '?':
1.203 schwarze 712: attr = arg1;
713: arg1 = va_arg(ap, char *);
1.194 schwarze 714: break;
715: default:
716: abort();
717: }
1.203 schwarze 718: if (*fmt == 'M')
719: arg2 = va_arg(ap, char *);
720: if (arg1 == NULL)
721: continue;
722:
1.238 schwarze 723: /* Print the attributes. */
1.203 schwarze 724:
1.197 schwarze 725: print_byte(h, ' ');
726: print_word(h, attr);
727: print_byte(h, '=');
728: print_byte(h, '"');
1.195 schwarze 729: switch (*fmt) {
1.208 schwarze 730: case 'I':
731: print_href(h, arg1, NULL, 0);
732: fmt++;
733: break;
1.195 schwarze 734: case 'M':
1.203 schwarze 735: print_href(h, arg1, arg2, 1);
1.195 schwarze 736: fmt++;
737: break;
1.208 schwarze 738: case 'R':
739: print_byte(h, '#');
740: print_encode(h, arg1, NULL, 1);
1.195 schwarze 741: fmt++;
1.208 schwarze 742: break;
1.195 schwarze 743: default:
1.244 schwarze 744: print_encode(h, arg1, NULL, 1);
1.195 schwarze 745: break;
746: }
1.197 schwarze 747: print_byte(h, '"');
1.194 schwarze 748: }
1.244 schwarze 749:
750: style_written = 0;
751: while (*fmt++ == 's') {
752: arg1 = va_arg(ap, char *);
753: arg2 = va_arg(ap, char *);
754: if (arg2 == NULL)
755: continue;
756: print_byte(h, ' ');
757: if (style_written == 0) {
758: print_word(h, "style=\"");
759: style_written = 1;
760: }
761: print_word(h, arg1);
762: print_byte(h, ':');
763: print_byte(h, ' ');
764: print_word(h, arg2);
765: print_byte(h, ';');
766: }
767: if (style_written)
768: print_byte(h, '"');
769:
1.194 schwarze 770: va_end(ap);
1.94 kristaps 771:
1.172 kristaps 772: /* Accommodate for "well-formed" singleton escaping. */
1.94 kristaps 773:
1.257 schwarze 774: if (htmltags[tag].flags & HTML_NOSTACK)
1.197 schwarze 775: print_byte(h, '/');
1.93 kristaps 776:
1.197 schwarze 777: print_byte(h, '>');
1.14 kristaps 778:
1.196 schwarze 779: if (tflags & HTML_NLBEGIN)
1.197 schwarze 780: print_endline(h);
1.196 schwarze 781: else
782: h->flags |= HTML_NOSPACE;
1.117 kristaps 783:
1.196 schwarze 784: if (tflags & HTML_INDENT)
785: h->indent++;
786: if (tflags & HTML_NOINDENT)
787: h->noindent++;
1.117 kristaps 788:
1.188 schwarze 789: return t;
1.264 schwarze 790: }
791:
792: /*
793: * Print an element with an optional "id=" attribute.
1.265 schwarze 794: * If the element has phrasing content and an "id=" attribute,
795: * also add a permalink: outside if it can be in phrasing context,
796: * inside otherwise.
1.264 schwarze 797: */
798: struct tag *
799: print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
800: struct roff_node *n)
801: {
1.265 schwarze 802: struct roff_node *nch;
1.264 schwarze 803: struct tag *ret, *t;
1.266 schwarze 804: char *id, *href;
1.264 schwarze 805:
806: ret = NULL;
1.266 schwarze 807: id = href = NULL;
1.264 schwarze 808: if (n->flags & NODE_ID)
809: id = html_make_id(n, 1);
1.266 schwarze 810: if (n->flags & NODE_HREF)
1.269 schwarze 811: href = id == NULL ? html_make_id(n, 2) : id;
1.266 schwarze 812: if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
813: ret = print_otag(h, TAG_A, "chR", "permalink", href);
1.264 schwarze 814: t = print_otag(h, elemtype, "ci", cattr, id);
815: if (ret == NULL) {
816: ret = t;
1.266 schwarze 817: if (href != NULL && (nch = n->child) != NULL) {
1.265 schwarze 818: /* man(7) is safe, it tags phrasing content only. */
819: if (n->tok > MDOC_MAX ||
820: htmltags[elemtype].flags & HTML_TOPHRASE)
821: nch = NULL;
822: else /* For mdoc(7), beware of nested blocks. */
823: while (nch != NULL && nch->type == ROFFT_TEXT)
824: nch = nch->next;
825: if (nch == NULL)
1.266 schwarze 826: print_otag(h, TAG_A, "chR", "permalink", href);
1.265 schwarze 827: }
1.264 schwarze 828: }
1.269 schwarze 829: free(id);
1.266 schwarze 830: if (id == NULL)
831: free(href);
1.264 schwarze 832: return ret;
1.14 kristaps 833: }
834:
1.29 kristaps 835: static void
1.184 schwarze 836: print_ctag(struct html *h, struct tag *tag)
1.14 kristaps 837: {
1.196 schwarze 838: int tflags;
1.156 schwarze 839:
1.252 schwarze 840: if (tag->closed == 0) {
841: tag->closed = 1;
842: if (tag == h->metaf)
843: h->metaf = NULL;
844: if (tag == h->tblt)
845: h->tblt = NULL;
846:
847: tflags = htmltags[tag->tag].flags;
848: if (tflags & HTML_INDENT)
849: h->indent--;
850: if (tflags & HTML_NOINDENT)
851: h->noindent--;
852: if (tflags & HTML_NLEND)
853: print_endline(h);
854: print_indent(h);
855: print_byte(h, '<');
856: print_byte(h, '/');
857: print_word(h, htmltags[tag->tag].name);
858: print_byte(h, '>');
859: if (tflags & HTML_NLAFTER)
860: print_endline(h);
861: }
862: if (tag->refcnt == 0) {
863: h->tag = tag->next;
864: free(tag);
865: }
1.14 kristaps 866: }
867:
1.51 kristaps 868: void
1.93 kristaps 869: print_gen_decls(struct html *h)
1.1 kristaps 870: {
1.197 schwarze 871: print_word(h, "<!DOCTYPE html>");
872: print_endline(h);
1.221 schwarze 873: }
874:
875: void
876: print_gen_comment(struct html *h, struct roff_node *n)
877: {
878: int wantblank;
879:
880: print_word(h, "<!-- This is an automatically generated file."
881: " Do not edit.");
882: h->indent = 1;
883: wantblank = 0;
884: while (n != NULL && n->type == ROFFT_COMMENT) {
885: if (strstr(n->string, "-->") == NULL &&
886: (wantblank || *n->string != '\0')) {
887: print_endline(h);
888: print_indent(h);
889: print_word(h, n->string);
890: wantblank = *n->string != '\0';
891: }
892: n = n->next;
893: }
894: if (wantblank)
895: print_endline(h);
896: print_word(h, " -->");
897: print_endline(h);
898: h->indent = 0;
1.1 kristaps 899: }
900:
1.51 kristaps 901: void
1.104 kristaps 902: print_text(struct html *h, const char *word)
1.1 kristaps 903: {
1.268 schwarze 904: print_tagged_text(h, word, NULL);
905: }
906:
907: void
908: print_tagged_text(struct html *h, const char *word, struct roff_node *n)
909: {
910: struct tag *t;
911: char *href;
912:
1.260 schwarze 913: /*
914: * Always wrap text in a paragraph unless already contained in
915: * some flow container; never put it directly into a section.
916: */
917:
918: if (h->tag->tag == TAG_SECTION)
919: print_otag(h, TAG_P, "c", "Pp");
920:
921: /* Output whitespace before this text? */
922:
1.197 schwarze 923: if (h->col && (h->flags & HTML_NOSPACE) == 0) {
1.105 kristaps 924: if ( ! (HTML_KEEP & h->flags)) {
925: if (HTML_PREKEEP & h->flags)
926: h->flags |= HTML_KEEP;
1.197 schwarze 927: print_endword(h);
1.105 kristaps 928: } else
1.216 schwarze 929: print_word(h, " ");
1.105 kristaps 930: }
1.260 schwarze 931:
932: /*
1.268 schwarze 933: * Optionally switch fonts, optionally write a permalink, then
934: * print the text, optionally surrounded by HTML whitespace.
1.260 schwarze 935: */
1.30 kristaps 936:
1.255 schwarze 937: assert(h->metaf == NULL);
938: print_metaf(h);
939: print_indent(h);
1.268 schwarze 940:
1.269 schwarze 941: if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
1.268 schwarze 942: t = print_otag(h, TAG_A, "chR", "permalink", href);
943: free(href);
944: } else
945: t = NULL;
946:
1.195 schwarze 947: if ( ! print_encode(h, word, NULL, 0)) {
1.109 kristaps 948: if ( ! (h->flags & HTML_NONOSPACE))
949: h->flags &= ~HTML_NOSPACE;
1.183 schwarze 950: h->flags &= ~HTML_NONEWLINE;
1.149 kristaps 951: } else
1.183 schwarze 952: h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
1.122 kristaps 953:
1.255 schwarze 954: if (h->metaf != NULL) {
1.122 kristaps 955: print_tagq(h, h->metaf);
956: h->metaf = NULL;
1.268 schwarze 957: } else if (t != NULL)
958: print_tagq(h, t);
1.113 schwarze 959:
960: h->flags &= ~HTML_IGNDELIM;
1.1 kristaps 961: }
1.30 kristaps 962:
1.51 kristaps 963: void
1.30 kristaps 964: print_tagq(struct html *h, const struct tag *until)
965: {
1.252 schwarze 966: struct tag *this, *next;
1.30 kristaps 967:
1.252 schwarze 968: for (this = h->tag; this != NULL; this = next) {
969: next = this == until ? NULL : this->next;
970: print_ctag(h, this);
1.30 kristaps 971: }
972: }
973:
1.250 schwarze 974: /*
975: * Close out all open elements up to but excluding suntil.
976: * Note that a paragraph just inside stays open together with it
977: * because paragraphs include subsequent phrasing content.
978: */
1.51 kristaps 979: void
1.30 kristaps 980: print_stagq(struct html *h, const struct tag *suntil)
981: {
1.252 schwarze 982: struct tag *this, *next;
1.30 kristaps 983:
1.252 schwarze 984: for (this = h->tag; this != NULL; this = next) {
985: next = this->next;
986: if (this == suntil || (next == suntil &&
987: (this->tag == TAG_P || this->tag == TAG_PRE)))
988: break;
989: print_ctag(h, this);
1.30 kristaps 990: }
1.171 kristaps 991: }
992:
1.197 schwarze 993:
994: /***********************************************************************
995: * Low level output functions.
996: * They implement line breaking using a short static buffer.
997: ***********************************************************************/
998:
999: /*
1000: * Buffer one HTML output byte.
1001: * If the buffer is full, flush and deactivate it and start a new line.
1002: * If the buffer is inactive, print directly.
1003: */
1004: static void
1005: print_byte(struct html *h, char c)
1006: {
1007: if ((h->flags & HTML_BUFFER) == 0) {
1008: putchar(c);
1009: h->col++;
1010: return;
1011: }
1012:
1013: if (h->col + h->bufcol < sizeof(h->buf)) {
1014: h->buf[h->bufcol++] = c;
1015: return;
1016: }
1017:
1018: putchar('\n');
1019: h->col = 0;
1020: print_indent(h);
1021: putchar(' ');
1022: putchar(' ');
1023: fwrite(h->buf, h->bufcol, 1, stdout);
1024: putchar(c);
1025: h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1026: h->bufcol = 0;
1027: h->flags &= ~HTML_BUFFER;
1028: }
1029:
1.196 schwarze 1030: /*
1031: * If something was printed on the current output line, end it.
1.197 schwarze 1032: * Not to be called right after print_indent().
1.196 schwarze 1033: */
1.202 schwarze 1034: void
1.197 schwarze 1035: print_endline(struct html *h)
1.196 schwarze 1036: {
1.197 schwarze 1037: if (h->col == 0)
1.196 schwarze 1038: return;
1039:
1.197 schwarze 1040: if (h->bufcol) {
1041: putchar(' ');
1042: fwrite(h->buf, h->bufcol, 1, stdout);
1043: h->bufcol = 0;
1044: }
1.196 schwarze 1045: putchar('\n');
1.197 schwarze 1046: h->col = 0;
1047: h->flags |= HTML_NOSPACE;
1048: h->flags &= ~HTML_BUFFER;
1049: }
1050:
1051: /*
1052: * Flush the HTML output buffer.
1053: * If it is inactive, activate it.
1054: */
1055: static void
1056: print_endword(struct html *h)
1057: {
1058: if (h->noindent) {
1059: print_byte(h, ' ');
1060: return;
1061: }
1062:
1063: if ((h->flags & HTML_BUFFER) == 0) {
1064: h->col++;
1065: h->flags |= HTML_BUFFER;
1066: } else if (h->bufcol) {
1067: putchar(' ');
1068: fwrite(h->buf, h->bufcol, 1, stdout);
1069: h->col += h->bufcol + 1;
1070: }
1071: h->bufcol = 0;
1.196 schwarze 1072: }
1073:
1074: /*
1075: * If at the beginning of a new output line,
1076: * perform indentation and mark the line as containing output.
1077: * Make sure to really produce some output right afterwards,
1078: * but do not use print_otag() for producing it.
1079: */
1080: static void
1.197 schwarze 1081: print_indent(struct html *h)
1.196 schwarze 1082: {
1.197 schwarze 1083: size_t i;
1.196 schwarze 1084:
1.261 schwarze 1085: if (h->col || h->noindent)
1.196 schwarze 1086: return;
1087:
1.261 schwarze 1088: h->col = h->indent * 2;
1089: for (i = 0; i < h->col; i++)
1090: putchar(' ');
1.197 schwarze 1091: }
1092:
1093: /*
1094: * Print or buffer some characters
1095: * depending on the current HTML output buffer state.
1096: */
1097: static void
1098: print_word(struct html *h, const char *cp)
1099: {
1100: while (*cp != '\0')
1101: print_byte(h, *cp++);
1.68 kristaps 1102: }
CVSweb