version 1.82, 2009/11/09 05:11:46 |
version 1.109, 2010/07/23 00:08:57 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> |
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
*/ |
*/ |
|
#ifdef HAVE_CONFIG_H |
|
#include "config.h" |
|
#endif |
|
|
#include <sys/types.h> |
#include <sys/types.h> |
|
|
#include <assert.h> |
#include <assert.h> |
|
|
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
|
|
#include "mandoc.h" |
#include "out.h" |
#include "out.h" |
#include "chars.h" |
#include "chars.h" |
#include "html.h" |
#include "html.h" |
#include "main.h" |
#include "main.h" |
|
|
#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) |
|
|
|
#define DOCTYPE "-//W3C//DTD HTML 4.01//EN" |
|
#define DTD "http://www.w3.org/TR/html4/strict.dtd" |
|
|
|
struct htmldata { |
struct htmldata { |
const char *name; |
const char *name; |
int flags; |
int flags; |
#define HTML_CLRLINE (1 << 0) |
#define HTML_CLRLINE (1 << 0) |
#define HTML_NOSTACK (1 << 1) |
#define HTML_NOSTACK (1 << 1) |
|
#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ |
}; |
}; |
|
|
static const struct htmldata htmltags[TAG_MAX] = { |
static const struct htmldata htmltags[TAG_MAX] = { |
{"html", HTML_CLRLINE}, /* TAG_HTML */ |
{"html", HTML_CLRLINE}, /* TAG_HTML */ |
{"head", HTML_CLRLINE}, /* TAG_HEAD */ |
{"head", HTML_CLRLINE}, /* TAG_HEAD */ |
{"body", HTML_CLRLINE}, /* TAG_BODY */ |
{"body", HTML_CLRLINE}, /* TAG_BODY */ |
{"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */ |
{"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ |
{"title", HTML_CLRLINE}, /* TAG_TITLE */ |
{"title", HTML_CLRLINE}, /* TAG_TITLE */ |
{"div", HTML_CLRLINE}, /* TAG_DIV */ |
{"div", HTML_CLRLINE}, /* TAG_DIV */ |
{"h1", 0}, /* TAG_H1 */ |
{"h1", 0}, /* TAG_H1 */ |
{"h2", 0}, /* TAG_H2 */ |
{"h2", 0}, /* TAG_H2 */ |
{"p", HTML_CLRLINE}, /* TAG_P */ |
|
{"span", 0}, /* TAG_SPAN */ |
{"span", 0}, /* TAG_SPAN */ |
{"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ |
{"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ |
{"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ |
{"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ |
{"a", 0}, /* TAG_A */ |
{"a", 0}, /* TAG_A */ |
{"table", HTML_CLRLINE}, /* TAG_TABLE */ |
{"table", HTML_CLRLINE}, /* TAG_TABLE */ |
{"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */ |
{"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ |
{"tr", HTML_CLRLINE}, /* TAG_TR */ |
{"tr", HTML_CLRLINE}, /* TAG_TR */ |
{"td", HTML_CLRLINE}, /* TAG_TD */ |
{"td", HTML_CLRLINE}, /* TAG_TD */ |
{"li", HTML_CLRLINE}, /* TAG_LI */ |
{"li", HTML_CLRLINE}, /* TAG_LI */ |
{"ul", HTML_CLRLINE}, /* TAG_UL */ |
{"ul", HTML_CLRLINE}, /* TAG_UL */ |
{"ol", HTML_CLRLINE}, /* TAG_OL */ |
{"ol", HTML_CLRLINE}, /* TAG_OL */ |
{"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */ |
|
}; |
}; |
|
|
|
static const char *const htmlfonts[HTMLFONT_MAX] = { |
|
"roman", |
|
"bold", |
|
"italic" |
|
}; |
|
|
static const char *const htmlattrs[ATTR_MAX] = { |
static const char *const htmlattrs[ATTR_MAX] = { |
"http-equiv", |
"http-equiv", |
"content", |
"content", |
Line 83 static const char *const htmlattrs[ATTR_MAX] = { |
|
Line 88 static const char *const htmlattrs[ATTR_MAX] = { |
|
"summary", |
"summary", |
}; |
}; |
|
|
#ifdef __linux__ |
static void print_spec(struct html *, enum roffdeco, |
extern int getsubopt(char **, char * const *, char **); |
const char *, size_t); |
#endif |
static void print_res(struct html *, const char *, size_t); |
|
|
|
|
static void print_spec(struct html *, const char *, int); |
|
static void print_res(struct html *, const char *, int); |
|
static void print_ctag(struct html *, enum htmltag); |
static void print_ctag(struct html *, enum htmltag); |
static void print_encode(struct html *, const char *); |
static void print_doctype(struct html *); |
|
static void print_xmltype(struct html *); |
|
static int print_encode(struct html *, const char *, int); |
|
static void print_metaf(struct html *, enum roffdeco); |
|
static void print_attr(struct html *, |
|
const char *, const char *); |
|
static void *ml_alloc(char *, enum htmltype); |
|
|
|
|
void * |
static void * |
html_alloc(char *outopts) |
ml_alloc(char *outopts, enum htmltype type) |
{ |
{ |
struct html *h; |
struct html *h; |
const char *toks[4]; |
const char *toks[4]; |
Line 112 html_alloc(char *outopts) |
|
Line 119 html_alloc(char *outopts) |
|
exit(EXIT_FAILURE); |
exit(EXIT_FAILURE); |
} |
} |
|
|
|
h->type = type; |
h->tags.head = NULL; |
h->tags.head = NULL; |
h->ords.head = NULL; |
h->ords.head = NULL; |
h->symtab = chars_init(CHARS_HTML); |
h->symtab = chars_init(CHARS_HTML); |
Line 134 html_alloc(char *outopts) |
|
Line 142 html_alloc(char *outopts) |
|
return(h); |
return(h); |
} |
} |
|
|
|
void * |
|
html_alloc(char *outopts) |
|
{ |
|
|
|
return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); |
|
} |
|
|
|
|
|
void * |
|
xhtml_alloc(char *outopts) |
|
{ |
|
|
|
return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); |
|
} |
|
|
|
|
void |
void |
html_free(void *p) |
html_free(void *p) |
{ |
{ |
Line 193 print_gen_head(struct html *h) |
|
Line 216 print_gen_head(struct html *h) |
|
|
|
|
|
static void |
static void |
print_spec(struct html *h, const char *p, int len) |
print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) |
{ |
{ |
|
int cp; |
const char *rhs; |
const char *rhs; |
size_t sz; |
size_t sz; |
|
|
rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz); |
if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { |
|
printf("&#%d;", cp); |
if (NULL == rhs) |
|
return; |
return; |
fwrite(rhs, 1, sz, stdout); |
} else if (-1 == cp && DECO_SSPECIAL == d) { |
|
fwrite(p, 1, len, stdout); |
|
return; |
|
} else if (-1 == cp) |
|
return; |
|
|
|
if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz))) |
|
fwrite(rhs, 1, sz, stdout); |
} |
} |
|
|
|
|
static void |
static void |
print_res(struct html *h, const char *p, int len) |
print_res(struct html *h, const char *p, size_t len) |
{ |
{ |
|
int cp; |
const char *rhs; |
const char *rhs; |
size_t sz; |
size_t sz; |
|
|
rhs = chars_a2res(h->symtab, p, (size_t)len, &sz); |
if ((cp = chars_res2cp(h->symtab, p, len)) > 0) { |
|
printf("&#%d;", cp); |
if (NULL == rhs) |
|
return; |
return; |
fwrite(rhs, 1, sz, stdout); |
} else if (-1 == cp) |
|
return; |
|
|
|
if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz))) |
|
fwrite(rhs, 1, sz, stdout); |
} |
} |
|
|
|
|
|
struct tag * |
|
print_ofont(struct html *h, enum htmlfont font) |
|
{ |
|
struct htmlpair tag; |
|
|
|
h->metal = h->metac; |
|
h->metac = font; |
|
|
|
/* FIXME: DECO_ROMAN should just close out preexisting. */ |
|
|
|
if (h->metaf && h->tags.head == h->metaf) |
|
print_tagq(h, h->metaf); |
|
|
|
PAIR_CLASS_INIT(&tag, htmlfonts[font]); |
|
h->metaf = print_otag(h, TAG_SPAN, 1, &tag); |
|
return(h->metaf); |
|
} |
|
|
|
|
static void |
static void |
print_encode(struct html *h, const char *p) |
print_metaf(struct html *h, enum roffdeco deco) |
{ |
{ |
|
enum htmlfont font; |
|
|
|
switch (deco) { |
|
case (DECO_PREVIOUS): |
|
font = h->metal; |
|
break; |
|
case (DECO_ITALIC): |
|
font = HTMLFONT_ITALIC; |
|
break; |
|
case (DECO_BOLD): |
|
font = HTMLFONT_BOLD; |
|
break; |
|
case (DECO_ROMAN): |
|
font = HTMLFONT_NONE; |
|
break; |
|
default: |
|
abort(); |
|
/* NOTREACHED */ |
|
} |
|
|
|
(void)print_ofont(h, font); |
|
} |
|
|
|
|
|
static int |
|
print_encode(struct html *h, const char *p, int norecurse) |
|
{ |
size_t sz; |
size_t sz; |
int len; |
int len, nospace; |
const char *seq; |
const char *seq; |
enum roffdeco deco; |
enum roffdeco deco; |
|
static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; |
|
|
|
nospace = 0; |
|
|
for (; *p; p++) { |
for (; *p; p++) { |
sz = strcspn(p, "\\<>&"); |
sz = strcspn(p, rejs); |
|
|
fwrite(p, 1, sz, stdout); |
fwrite(p, 1, sz, stdout); |
p += /* LINTED */ |
p += /* LINTED */ |
Line 244 print_encode(struct html *h, const char *p) |
|
Line 327 print_encode(struct html *h, const char *p) |
|
} else if ('&' == *p) { |
} else if ('&' == *p) { |
printf("&"); |
printf("&"); |
continue; |
continue; |
|
} else if (ASCII_HYPH == *p) { |
|
/* |
|
* Note: "soft hyphens" aren't graphically |
|
* displayed when not breaking the text; we want |
|
* them to be displayed. |
|
*/ |
|
/*printf("­");*/ |
|
putchar('-'); |
|
continue; |
} else if ('\0' == *p) |
} else if ('\0' == *p) |
break; |
break; |
|
|
Line 254 print_encode(struct html *h, const char *p) |
|
Line 346 print_encode(struct html *h, const char *p) |
|
case (DECO_RESERVED): |
case (DECO_RESERVED): |
print_res(h, seq, sz); |
print_res(h, seq, sz); |
break; |
break; |
|
case (DECO_SSPECIAL): |
|
/* FALLTHROUGH */ |
case (DECO_SPECIAL): |
case (DECO_SPECIAL): |
print_spec(h, seq, sz); |
print_spec(h, deco, seq, sz); |
break; |
break; |
|
case (DECO_PREVIOUS): |
|
/* FALLTHROUGH */ |
|
case (DECO_BOLD): |
|
/* FALLTHROUGH */ |
|
case (DECO_ITALIC): |
|
/* FALLTHROUGH */ |
|
case (DECO_ROMAN): |
|
if (norecurse) |
|
break; |
|
print_metaf(h, deco); |
|
break; |
default: |
default: |
break; |
break; |
} |
} |
|
|
p += len - 1; |
p += len - 1; |
|
|
|
if (DECO_NOSPACE == deco && '\0' == *(p + 1)) |
|
nospace = 1; |
} |
} |
|
|
|
return(nospace); |
} |
} |
|
|
|
|
|
static void |
|
print_attr(struct html *h, const char *key, const char *val) |
|
{ |
|
printf(" %s=\"", key); |
|
(void)print_encode(h, val, 1); |
|
putchar('\"'); |
|
} |
|
|
|
|
struct tag * |
struct tag * |
print_otag(struct html *h, enum htmltag tag, |
print_otag(struct html *h, enum htmltag tag, |
int sz, const struct htmlpair *p) |
int sz, const struct htmlpair *p) |
Line 273 print_otag(struct html *h, enum htmltag tag, |
|
Line 392 print_otag(struct html *h, enum htmltag tag, |
|
int i; |
int i; |
struct tag *t; |
struct tag *t; |
|
|
|
/* Push this tags onto the stack of open scopes. */ |
|
|
if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { |
if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { |
t = malloc(sizeof(struct tag)); |
t = malloc(sizeof(struct tag)); |
if (NULL == t) { |
if (NULL == t) { |
Line 286 print_otag(struct html *h, enum htmltag tag, |
|
Line 407 print_otag(struct html *h, enum htmltag tag, |
|
t = NULL; |
t = NULL; |
|
|
if ( ! (HTML_NOSPACE & h->flags)) |
if ( ! (HTML_NOSPACE & h->flags)) |
if ( ! (HTML_CLRLINE & htmltags[tag].flags)) |
if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { |
putchar(' '); |
/* Manage keeps! */ |
|
if ( ! (HTML_KEEP & h->flags)) { |
|
if (HTML_PREKEEP & h->flags) |
|
h->flags |= HTML_KEEP; |
|
putchar(' '); |
|
} else |
|
printf(" "); |
|
} |
|
|
|
if ( ! (h->flags & HTML_NONOSPACE)) |
|
h->flags &= ~HTML_NOSPACE; |
|
|
|
/* Print out the tag name and attributes. */ |
|
|
printf("<%s", htmltags[tag].name); |
printf("<%s", htmltags[tag].name); |
for (i = 0; i < sz; i++) { |
for (i = 0; i < sz; i++) |
printf(" %s=\"", htmlattrs[p[i].key]); |
print_attr(h, htmlattrs[p[i].key], p[i].val); |
assert(p->val); |
|
print_encode(h, p[i].val); |
/* Add non-overridable attributes. */ |
putchar('\"'); |
|
|
if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { |
|
print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); |
|
print_attr(h, "xml:lang", "en"); |
|
print_attr(h, "lang", "en"); |
} |
} |
|
|
|
/* Accomodate for XML "well-formed" singleton escaping. */ |
|
|
|
if (HTML_AUTOCLOSE & htmltags[tag].flags) |
|
switch (h->type) { |
|
case (HTML_XHTML_1_0_STRICT): |
|
putchar('/'); |
|
break; |
|
default: |
|
break; |
|
} |
|
|
putchar('>'); |
putchar('>'); |
|
|
h->flags |= HTML_NOSPACE; |
h->flags |= HTML_NOSPACE; |
if (HTML_CLRLINE & htmltags[tag].flags) |
|
h->flags |= HTML_NEWLINE; |
|
else |
|
h->flags &= ~HTML_NEWLINE; |
|
|
|
return(t); |
return(t); |
} |
} |
|
|
|
|
/* ARGSUSED */ |
|
static void |
static void |
print_ctag(struct html *h, enum htmltag tag) |
print_ctag(struct html *h, enum htmltag tag) |
{ |
{ |
Line 316 print_ctag(struct html *h, enum htmltag tag) |
|
Line 459 print_ctag(struct html *h, enum htmltag tag) |
|
printf("</%s>", htmltags[tag].name); |
printf("</%s>", htmltags[tag].name); |
if (HTML_CLRLINE & htmltags[tag].flags) { |
if (HTML_CLRLINE & htmltags[tag].flags) { |
h->flags |= HTML_NOSPACE; |
h->flags |= HTML_NOSPACE; |
h->flags |= HTML_NEWLINE; |
|
putchar('\n'); |
putchar('\n'); |
} else |
} |
h->flags &= ~HTML_NEWLINE; |
|
} |
} |
|
|
|
|
/* ARGSUSED */ |
|
void |
void |
print_gen_doctype(struct html *h) |
print_gen_decls(struct html *h) |
{ |
{ |
|
|
printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD); |
print_xmltype(h); |
|
print_doctype(h); |
} |
} |
|
|
|
|
|
static void |
|
print_xmltype(struct html *h) |
|
{ |
|
|
|
if (HTML_XHTML_1_0_STRICT == h->type) |
|
printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); |
|
} |
|
|
|
|
|
static void |
|
print_doctype(struct html *h) |
|
{ |
|
const char *doctype; |
|
const char *dtd; |
|
const char *name; |
|
|
|
switch (h->type) { |
|
case (HTML_HTML_4_01_STRICT): |
|
name = "HTML"; |
|
doctype = "-//W3C//DTD HTML 4.01//EN"; |
|
dtd = "http://www.w3.org/TR/html4/strict.dtd"; |
|
break; |
|
default: |
|
name = "html"; |
|
doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; |
|
dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; |
|
break; |
|
} |
|
|
|
printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", |
|
name, doctype, dtd); |
|
} |
|
|
|
|
void |
void |
print_text(struct html *h, const char *p) |
print_text(struct html *h, const char *word) |
{ |
{ |
|
|
if (*p && 0 == *(p + 1)) |
if (word[0] && '\0' == word[1]) |
switch (*p) { |
switch (word[0]) { |
case('.'): |
case('.'): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case(','): |
case(','): |
Line 353 print_text(struct html *h, const char *p) |
|
Line 528 print_text(struct html *h, const char *p) |
|
case(')'): |
case(')'): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case(']'): |
case(']'): |
/* FALLTHROUGH */ |
|
case('}'): |
|
if ( ! (HTML_IGNDELIM & h->flags)) |
if ( ! (HTML_IGNDELIM & h->flags)) |
h->flags |= HTML_NOSPACE; |
h->flags |= HTML_NOSPACE; |
break; |
break; |
Line 362 print_text(struct html *h, const char *p) |
|
Line 535 print_text(struct html *h, const char *p) |
|
break; |
break; |
} |
} |
|
|
if ( ! (h->flags & HTML_NOSPACE)) |
if ( ! (HTML_NOSPACE & h->flags)) { |
putchar(' '); |
/* Manage keeps! */ |
|
if ( ! (HTML_KEEP & h->flags)) { |
|
if (HTML_PREKEEP & h->flags) |
|
h->flags |= HTML_KEEP; |
|
putchar(' '); |
|
} else |
|
printf(" "); |
|
} |
|
|
h->flags &= ~HTML_NOSPACE; |
assert(word); |
h->flags &= ~HTML_NEWLINE; |
if ( ! print_encode(h, word, 0)) |
|
if ( ! (h->flags & HTML_NONOSPACE)) |
|
h->flags &= ~HTML_NOSPACE; |
|
|
if (p) |
/* |
print_encode(h, p); |
* Note that we don't process the pipe: the parser sees it as |
|
* punctuation, but we don't in terms of typography. |
if (*p && 0 == *(p + 1)) |
*/ |
switch (*p) { |
if (word[0] && '\0' == word[1]) |
|
switch (word[0]) { |
case('('): |
case('('): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case('['): |
case('['): |
/* FALLTHROUGH */ |
|
case('{'): |
|
h->flags |= HTML_NOSPACE; |
h->flags |= HTML_NOSPACE; |
break; |
break; |
default: |
default: |
Line 392 print_tagq(struct html *h, const struct tag *until) |
|
Line 573 print_tagq(struct html *h, const struct tag *until) |
|
struct tag *tag; |
struct tag *tag; |
|
|
while ((tag = h->tags.head) != NULL) { |
while ((tag = h->tags.head) != NULL) { |
|
if (tag == h->metaf) |
|
h->metaf = NULL; |
print_ctag(h, tag->tag); |
print_ctag(h, tag->tag); |
h->tags.head = tag->next; |
h->tags.head = tag->next; |
free(tag); |
free(tag); |
Line 409 print_stagq(struct html *h, const struct tag *suntil) |
|
Line 592 print_stagq(struct html *h, const struct tag *suntil) |
|
while ((tag = h->tags.head) != NULL) { |
while ((tag = h->tags.head) != NULL) { |
if (suntil && tag == suntil) |
if (suntil && tag == suntil) |
return; |
return; |
|
if (tag == h->metaf) |
|
h->metaf = NULL; |
print_ctag(h, tag->tag); |
print_ctag(h, tag->tag); |
h->tags.head = tag->next; |
h->tags.head = tag->next; |
free(tag); |
free(tag); |
Line 565 bufcat_su(struct html *h, const char *p, const struct |
|
Line 750 bufcat_su(struct html *h, const char *p, const struct |
|
break; |
break; |
} |
} |
|
|
if (su->pt) |
/* |
buffmt(h, "%s: %f%s;", p, v, u); |
* XXX: the CSS spec isn't clear as to which types accept |
else |
* integer or real numbers, so we just make them all decimals. |
/* LINTED */ |
*/ |
buffmt(h, "%s: %d%s;", p, (int)v, u); |
buffmt(h, "%s: %.2f%s;", p, v, u); |
} |
} |
|
|
|
|