version 1.256, 2019/08/02 17:06:04 |
version 1.270, 2020/04/20 13:07:24 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
|
* Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> |
|
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
* Common functions for mandoc(1) HTML formatters. |
|
* For use by individual formatters and by the main program. |
*/ |
*/ |
#include "config.h" |
#include "config.h" |
|
|
|
|
struct htmldata { |
struct htmldata { |
const char *name; |
const char *name; |
int flags; |
int flags; |
#define HTML_NOSTACK (1 << 0) |
#define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ |
#define HTML_AUTOCLOSE (1 << 1) |
#define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ |
#define HTML_NLBEFORE (1 << 2) |
#define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ |
#define HTML_NLBEGIN (1 << 3) |
#define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ |
#define HTML_NLEND (1 << 4) |
#define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ |
#define HTML_NLAFTER (1 << 5) |
#define HTML_NLEND (1 << 5) /* Output line break before closing. */ |
|
#define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ |
#define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) |
#define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) |
#define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) |
#define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) |
#define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) |
#define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) |
#define HTML_INDENT (1 << 6) |
#define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ |
#define HTML_NOINDENT (1 << 7) |
#define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ |
}; |
}; |
|
|
static const struct htmldata htmltags[TAG_MAX] = { |
static const struct htmldata htmltags[TAG_MAX] = { |
{"html", HTML_NLALL}, |
{"html", HTML_NLALL}, |
{"head", HTML_NLALL | HTML_INDENT}, |
{"head", HTML_NLALL | HTML_INDENT}, |
{"body", HTML_NLALL}, |
{"meta", HTML_NOSTACK | HTML_NLALL}, |
{"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, |
{"link", HTML_NOSTACK | HTML_NLALL}, |
|
{"style", HTML_NLALL | HTML_INDENT}, |
{"title", HTML_NLAROUND}, |
{"title", HTML_NLAROUND}, |
|
{"body", HTML_NLALL}, |
{"div", HTML_NLAROUND}, |
{"div", HTML_NLAROUND}, |
{"div", 0}, |
|
{"section", HTML_NLALL}, |
{"section", HTML_NLALL}, |
{"h1", HTML_NLAROUND}, |
|
{"h2", HTML_NLAROUND}, |
|
{"span", 0}, |
|
{"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, |
|
{"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, |
|
{"a", 0}, |
|
{"table", HTML_NLALL | HTML_INDENT}, |
{"table", HTML_NLALL | HTML_INDENT}, |
{"tr", HTML_NLALL | HTML_INDENT}, |
{"tr", HTML_NLALL | HTML_INDENT}, |
{"td", HTML_NLAROUND}, |
{"td", HTML_NLAROUND}, |
Line 79 static const struct htmldata htmltags[TAG_MAX] = { |
|
Line 78 static const struct htmldata htmltags[TAG_MAX] = { |
|
{"dl", HTML_NLALL | HTML_INDENT}, |
{"dl", HTML_NLALL | HTML_INDENT}, |
{"dt", HTML_NLAROUND}, |
{"dt", HTML_NLAROUND}, |
{"dd", HTML_NLAROUND | HTML_INDENT}, |
{"dd", HTML_NLAROUND | HTML_INDENT}, |
{"p", HTML_NLAROUND | HTML_INDENT}, |
{"h1", HTML_TOPHRASE | HTML_NLAROUND}, |
{"pre", HTML_NLALL | HTML_NOINDENT}, |
{"h2", HTML_TOPHRASE | HTML_NLAROUND}, |
{"var", 0}, |
{"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, |
{"cite", 0}, |
{"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT}, |
{"b", 0}, |
{"a", HTML_INPHRASE | HTML_TOPHRASE}, |
{"i", 0}, |
{"b", HTML_INPHRASE | HTML_TOPHRASE}, |
{"code", 0}, |
{"cite", HTML_INPHRASE | HTML_TOPHRASE}, |
{"small", 0}, |
{"code", HTML_INPHRASE | HTML_TOPHRASE}, |
{"style", HTML_NLALL | HTML_INDENT}, |
{"i", HTML_INPHRASE | HTML_TOPHRASE}, |
{"math", HTML_NLALL | HTML_INDENT}, |
{"small", HTML_INPHRASE | HTML_TOPHRASE}, |
|
{"span", HTML_INPHRASE | HTML_TOPHRASE}, |
|
{"var", HTML_INPHRASE | HTML_TOPHRASE}, |
|
{"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, |
|
{"mark", HTML_INPHRASE }, |
|
{"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, |
{"mrow", 0}, |
{"mrow", 0}, |
{"mi", 0}, |
{"mi", 0}, |
{"mn", 0}, |
{"mn", 0}, |
Line 108 static const struct htmldata htmltags[TAG_MAX] = { |
|
Line 112 static const struct htmldata htmltags[TAG_MAX] = { |
|
}; |
}; |
|
|
/* Avoid duplicate HTML id= attributes. */ |
/* Avoid duplicate HTML id= attributes. */ |
|
|
|
struct id_entry { |
|
int ord; /* Ordinal number of the latest occurrence. */ |
|
char id[]; /* The id= attribute without any ordinal suffix. */ |
|
}; |
static struct ohash id_unique; |
static struct ohash id_unique; |
|
|
static void html_reset_internal(struct html *); |
static void html_reset_internal(struct html *); |
Line 142 html_alloc(const struct manoutput *outopts) |
|
Line 151 html_alloc(const struct manoutput *outopts) |
|
if (outopts->toc) |
if (outopts->toc) |
h->oflags |= HTML_TOC; |
h->oflags |= HTML_TOC; |
|
|
mandoc_ohash_init(&id_unique, 4, 0); |
mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); |
|
|
return h; |
return h; |
} |
} |
|
|
html_reset_internal(struct html *h) |
html_reset_internal(struct html *h) |
{ |
{ |
struct tag *tag; |
struct tag *tag; |
char *cp; |
struct id_entry *entry; |
unsigned int slot; |
unsigned int slot; |
|
|
while ((tag = h->tag) != NULL) { |
while ((tag = h->tag) != NULL) { |
h->tag = tag->next; |
h->tag = tag->next; |
free(tag); |
free(tag); |
} |
} |
cp = ohash_first(&id_unique, &slot); |
entry = ohash_first(&id_unique, &slot); |
while (cp != NULL) { |
while (entry != NULL) { |
free(cp); |
free(entry); |
cp = ohash_next(&id_unique, &slot); |
entry = ohash_next(&id_unique, &slot); |
} |
} |
ohash_delete(&id_unique); |
ohash_delete(&id_unique); |
} |
} |
|
|
html_reset(void *p) |
html_reset(void *p) |
{ |
{ |
html_reset_internal(p); |
html_reset_internal(p); |
mandoc_ohash_init(&id_unique, 4, 0); |
mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); |
} |
} |
|
|
void |
void |
Line 271 print_metaf(struct html *h) |
|
Line 280 print_metaf(struct html *h) |
|
void |
void |
html_close_paragraph(struct html *h) |
html_close_paragraph(struct html *h) |
{ |
{ |
struct tag *t; |
struct tag *this, *next; |
|
int flags; |
|
|
for (t = h->tag; t != NULL && t->closed == 0; t = t->next) { |
this = h->tag; |
switch(t->tag) { |
for (;;) { |
case TAG_P: |
next = this->next; |
case TAG_PRE: |
flags = htmltags[this->tag].flags; |
print_tagq(h, t); |
if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) |
|
print_ctag(h, this); |
|
if ((flags & HTML_INPHRASE) == 0) |
break; |
break; |
case TAG_A: |
this = next; |
print_tagq(h, t); |
|
continue; |
|
default: |
|
continue; |
|
} |
|
break; |
|
} |
} |
} |
} |
|
|
Line 323 html_fillmode(struct html *h, enum roff_tok want) |
|
Line 329 html_fillmode(struct html *h, enum roff_tok want) |
|
return had; |
return had; |
} |
} |
|
|
|
/* |
|
* Allocate a string to be used for the "id=" attribute of an HTML |
|
* element and/or as a segment identifier for a URI in an <a> element. |
|
* The function may fail and return NULL if the node lacks text data |
|
* to create the attribute from. |
|
* The caller is responsible for free(3)ing the returned string. |
|
* |
|
* If the "unique" argument is non-zero, the "id_unique" ohash table |
|
* is used for de-duplication. If the "unique" argument is 1, |
|
* it is the first time the function is called for this tag and |
|
* location, so if an ordinal suffix is needed, it is incremented. |
|
* If the "unique" argument is 2, it is the second time the function |
|
* is called for this tag and location, so the ordinal suffix |
|
* remains unchanged. |
|
*/ |
char * |
char * |
html_make_id(const struct roff_node *n, int unique) |
html_make_id(const struct roff_node *n, int unique) |
{ |
{ |
const struct roff_node *nch; |
const struct roff_node *nch; |
char *buf, *bufs, *cp; |
struct id_entry *entry; |
|
char *buf, *cp; |
|
size_t len; |
unsigned int slot; |
unsigned int slot; |
int suffix; |
|
|
|
for (nch = n->child; nch != NULL; nch = nch->next) |
if (n->tag != NULL) |
if (nch->type != ROFFT_TEXT) |
buf = mandoc_strdup(n->tag); |
return NULL; |
else { |
|
switch (n->tok) { |
|
case MDOC_Sh: |
|
case MDOC_Ss: |
|
case MDOC_Sx: |
|
case MAN_SH: |
|
case MAN_SS: |
|
for (nch = n->child; nch != NULL; nch = nch->next) |
|
if (nch->type != ROFFT_TEXT) |
|
return NULL; |
|
buf = NULL; |
|
deroff(&buf, n); |
|
if (buf == NULL) |
|
return NULL; |
|
break; |
|
default: |
|
if (n->child == NULL || n->child->type != ROFFT_TEXT) |
|
return NULL; |
|
buf = mandoc_strdup(n->child->string); |
|
break; |
|
} |
|
} |
|
|
buf = NULL; |
|
deroff(&buf, n); |
|
if (buf == NULL) |
|
return NULL; |
|
|
|
/* |
/* |
* In ID attributes, only use ASCII characters that are |
* In ID attributes, only use ASCII characters that are |
* permitted in URL-fragment strings according to the |
* permitted in URL-fragment strings according to the |
* explicit list at: |
* explicit list at: |
* https://url.spec.whatwg.org/#url-fragment-string |
* https://url.spec.whatwg.org/#url-fragment-string |
|
* In addition, reserve '~' for ordinal suffixes. |
*/ |
*/ |
|
|
for (cp = buf; *cp != '\0'; cp++) |
for (cp = buf; *cp != '\0'; cp++) |
if (isalnum((unsigned char)*cp) == 0 && |
if (isalnum((unsigned char)*cp) == 0 && |
strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL) |
strchr("!$&'()*+,-./:;=?@_", *cp) == NULL) |
*cp = '_'; |
*cp = '_'; |
|
|
if (unique == 0) |
if (unique == 0) |
Line 357 html_make_id(const struct roff_node *n, int unique) |
|
Line 396 html_make_id(const struct roff_node *n, int unique) |
|
|
|
/* Avoid duplicate HTML id= attributes. */ |
/* Avoid duplicate HTML id= attributes. */ |
|
|
bufs = NULL; |
|
suffix = 1; |
|
slot = ohash_qlookup(&id_unique, buf); |
slot = ohash_qlookup(&id_unique, buf); |
cp = ohash_find(&id_unique, slot); |
if ((entry = ohash_find(&id_unique, slot)) == NULL) { |
if (cp != NULL) { |
len = strlen(buf) + 1; |
while (cp != NULL) { |
entry = mandoc_malloc(sizeof(*entry) + len); |
free(bufs); |
entry->ord = 1; |
if (++suffix > 127) { |
memcpy(entry->id, buf, len); |
free(buf); |
ohash_insert(&id_unique, slot, entry); |
return NULL; |
} else if (unique == 1) |
} |
entry->ord++; |
mandoc_asprintf(&bufs, "%s_%d", buf, suffix); |
|
slot = ohash_qlookup(&id_unique, bufs); |
if (entry->ord > 1) { |
cp = ohash_find(&id_unique, slot); |
cp = buf; |
} |
mandoc_asprintf(&buf, "%s~%d", cp, entry->ord); |
free(buf); |
free(cp); |
buf = bufs; |
|
} |
} |
ohash_insert(&id_unique, slot, buf); |
|
return buf; |
return buf; |
} |
} |
|
|
Line 584 print_otag(struct html *h, enum htmltag tag, const cha |
|
Line 619 print_otag(struct html *h, enum htmltag tag, const cha |
|
|
|
tflags = htmltags[tag].flags; |
tflags = htmltags[tag].flags; |
|
|
|
/* Flow content is not allowed in phrasing context. */ |
|
|
|
if ((tflags & HTML_INPHRASE) == 0) { |
|
for (t = h->tag; t != NULL; t = t->next) { |
|
if (t->closed) |
|
continue; |
|
assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); |
|
break; |
|
} |
|
|
|
/* |
|
* Always wrap phrasing elements in a paragraph |
|
* unless already contained in some flow container; |
|
* never put them directly into a section. |
|
*/ |
|
|
|
} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) |
|
print_otag(h, TAG_P, "c", "Pp"); |
|
|
/* Push this tag onto the stack of open scopes. */ |
/* Push this tag onto the stack of open scopes. */ |
|
|
if ((tflags & HTML_NOSTACK) == 0) { |
if ((tflags & HTML_NOSTACK) == 0) { |
Line 701 print_otag(struct html *h, enum htmltag tag, const cha |
|
Line 755 print_otag(struct html *h, enum htmltag tag, const cha |
|
|
|
/* Accommodate for "well-formed" singleton escaping. */ |
/* Accommodate for "well-formed" singleton escaping. */ |
|
|
if (HTML_AUTOCLOSE & htmltags[tag].flags) |
if (htmltags[tag].flags & HTML_NOSTACK) |
print_byte(h, '/'); |
print_byte(h, '/'); |
|
|
print_byte(h, '>'); |
print_byte(h, '>'); |
Line 719 print_otag(struct html *h, enum htmltag tag, const cha |
|
Line 773 print_otag(struct html *h, enum htmltag tag, const cha |
|
return t; |
return t; |
} |
} |
|
|
|
/* |
|
* Print an element with an optional "id=" attribute. |
|
* If the element has phrasing content and an "id=" attribute, |
|
* also add a permalink: outside if it can be in phrasing context, |
|
* inside otherwise. |
|
*/ |
|
struct tag * |
|
print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, |
|
struct roff_node *n) |
|
{ |
|
struct roff_node *nch; |
|
struct tag *ret, *t; |
|
char *id, *href; |
|
|
|
ret = NULL; |
|
id = href = NULL; |
|
if (n->flags & NODE_ID) |
|
id = html_make_id(n, 1); |
|
if (n->flags & NODE_HREF) |
|
href = id == NULL ? html_make_id(n, 2) : id; |
|
if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE) |
|
ret = print_otag(h, TAG_A, "chR", "permalink", href); |
|
t = print_otag(h, elemtype, "ci", cattr, id); |
|
if (ret == NULL) { |
|
ret = t; |
|
if (href != NULL && (nch = n->child) != NULL) { |
|
/* man(7) is safe, it tags phrasing content only. */ |
|
if (n->tok > MDOC_MAX || |
|
htmltags[elemtype].flags & HTML_TOPHRASE) |
|
nch = NULL; |
|
else /* For mdoc(7), beware of nested blocks. */ |
|
while (nch != NULL && nch->type == ROFFT_TEXT) |
|
nch = nch->next; |
|
if (nch == NULL) |
|
print_otag(h, TAG_A, "chR", "permalink", href); |
|
} |
|
} |
|
free(id); |
|
if (id == NULL) |
|
free(href); |
|
return ret; |
|
} |
|
|
static void |
static void |
print_ctag(struct html *h, struct tag *tag) |
print_ctag(struct html *h, struct tag *tag) |
{ |
{ |
Line 788 print_gen_comment(struct html *h, struct roff_node *n) |
|
Line 885 print_gen_comment(struct html *h, struct roff_node *n) |
|
void |
void |
print_text(struct html *h, const char *word) |
print_text(struct html *h, const char *word) |
{ |
{ |
|
print_tagged_text(h, word, NULL); |
|
} |
|
|
|
void |
|
print_tagged_text(struct html *h, const char *word, struct roff_node *n) |
|
{ |
|
struct tag *t; |
|
char *href; |
|
|
|
/* |
|
* Always wrap text in a paragraph unless already contained in |
|
* some flow container; never put it directly into a section. |
|
*/ |
|
|
|
if (h->tag->tag == TAG_SECTION) |
|
print_otag(h, TAG_P, "c", "Pp"); |
|
|
|
/* Output whitespace before this text? */ |
|
|
if (h->col && (h->flags & HTML_NOSPACE) == 0) { |
if (h->col && (h->flags & HTML_NOSPACE) == 0) { |
if ( ! (HTML_KEEP & h->flags)) { |
if ( ! (HTML_KEEP & h->flags)) { |
if (HTML_PREKEEP & h->flags) |
if (HTML_PREKEEP & h->flags) |
Line 797 print_text(struct html *h, const char *word) |
|
Line 913 print_text(struct html *h, const char *word) |
|
print_word(h, " "); |
print_word(h, " "); |
} |
} |
|
|
|
/* |
|
* Optionally switch fonts, optionally write a permalink, then |
|
* print the text, optionally surrounded by HTML whitespace. |
|
*/ |
|
|
assert(h->metaf == NULL); |
assert(h->metaf == NULL); |
print_metaf(h); |
print_metaf(h); |
print_indent(h); |
print_indent(h); |
|
|
|
if (n != NULL && (href = html_make_id(n, 2)) != NULL) { |
|
t = print_otag(h, TAG_A, "chR", "permalink", href); |
|
free(href); |
|
} else |
|
t = NULL; |
|
|
if ( ! print_encode(h, word, NULL, 0)) { |
if ( ! print_encode(h, word, NULL, 0)) { |
if ( ! (h->flags & HTML_NONOSPACE)) |
if ( ! (h->flags & HTML_NONOSPACE)) |
h->flags &= ~HTML_NOSPACE; |
h->flags &= ~HTML_NOSPACE; |
Line 810 print_text(struct html *h, const char *word) |
|
Line 938 print_text(struct html *h, const char *word) |
|
if (h->metaf != NULL) { |
if (h->metaf != NULL) { |
print_tagq(h, h->metaf); |
print_tagq(h, h->metaf); |
h->metaf = NULL; |
h->metaf = NULL; |
} |
} else if (t != NULL) |
|
print_tagq(h, t); |
|
|
h->flags &= ~HTML_IGNDELIM; |
h->flags &= ~HTML_IGNDELIM; |
} |
} |
Line 937 print_indent(struct html *h) |
|
Line 1066 print_indent(struct html *h) |
|
{ |
{ |
size_t i; |
size_t i; |
|
|
if (h->col) |
if (h->col || h->noindent) |
return; |
return; |
|
|
if (h->noindent == 0) { |
h->col = h->indent * 2; |
h->col = h->indent * 2; |
for (i = 0; i < h->col; i++) |
for (i = 0; i < h->col; i++) |
putchar(' '); |
putchar(' '); |
|
} |
|
h->flags &= ~HTML_NOSPACE; |
|
} |
} |
|
|
/* |
/* |