=================================================================== RCS file: /cvs/mandoc/html.c,v retrieving revision 1.1 retrieving revision 1.54 diff -u -p -r1.1 -r1.54 --- mandoc/html.c 2008/12/03 14:39:59 1.1 +++ mandoc/html.c 2009/10/03 15:26:26 1.54 @@ -1,51 +1,489 @@ -/* $Id: html.c,v 1.1 2008/12/03 14:39:59 kristaps Exp $ */ +/* $Id: html.c,v 1.54 2009/10/03 15:26:26 kristaps Exp $ */ /* - * Copyright (c) 2008 Kristaps Dzonsons + * Copyright (c) 2008, 2009 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all - * copies. + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include +#include + #include +#include +#include #include +#include +#include -#include "libmdocml.h" -#include "private.h" +#include "chars.h" +#include "html.h" +#define DOCTYPE "-//W3C//DTD HTML 4.01//EN" +#define DTD "http://www.w3.org/TR/html4/strict.dtd" -/* ARGSUSED */ -int -md_line_html(void *data, char *buf) +struct htmldata { + char *name; + int flags; +#define HTML_CLRLINE (1 << 0) +#define HTML_NOSTACK (1 << 1) +}; + +static const struct htmldata htmltags[TAG_MAX] = { + {"html", HTML_CLRLINE}, /* TAG_HTML */ + {"head", HTML_CLRLINE}, /* TAG_HEAD */ + {"body", HTML_CLRLINE}, /* TAG_BODY */ + {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */ + {"title", HTML_CLRLINE}, /* TAG_TITLE */ + {"div", HTML_CLRLINE}, /* TAG_DIV */ + {"h1", 0}, /* TAG_H1 */ + {"h2", 0}, /* TAG_H2 */ + {"p", HTML_CLRLINE}, /* TAG_P */ + {"span", 0}, /* TAG_SPAN */ + {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ + {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ + {"a", 0}, /* TAG_A */ + {"table", HTML_CLRLINE}, /* TAG_TABLE */ + {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */ + {"tr", HTML_CLRLINE}, /* TAG_TR */ + {"td", HTML_CLRLINE}, /* TAG_TD */ + {"li", HTML_CLRLINE}, /* TAG_LI */ + {"ul", HTML_CLRLINE}, /* TAG_UL */ + {"ol", HTML_CLRLINE}, /* TAG_OL */ + {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */ +}; + +static const char *const htmlattrs[ATTR_MAX] = { + "http-equiv", + "content", + "name", + "rel", + "href", + "type", + "media", + "class", + "style", + "width", + "valign", + "target", +}; + +#ifdef __linux__ +extern int getsubopt(char **, char * const *, char **); +#endif + +void * +html_alloc(char *outopts) { + struct html *h; + char *toks[4], *v; - return(1); + toks[0] = "style"; + toks[1] = "man"; + toks[2] = "includes"; + toks[3] = NULL; + + if (NULL == (h = calloc(1, sizeof(struct html)))) + return(NULL); + + SLIST_INIT(&h->tags); + SLIST_INIT(&h->ords); + + if (NULL == (h->symtab = chars_init(CHARS_HTML))) { + free(h); + return(NULL); + } + + while (outopts && *outopts) + switch (getsubopt(&outopts, toks, &v)) { + case (0): + h->style = v; + break; + case (1): + h->base_man = v; + break; + case (2): + h->base_includes = v; + break; + default: + break; + } + + return(h); } -/* ARGSUSED */ -int -md_exit_html(void *data, int flush) +void +html_free(void *p) { + struct tag *tag; + struct ord *ord; + struct html *h; - return(1); + h = (struct html *)p; + + while ( ! SLIST_EMPTY(&h->ords)) { + ord = SLIST_FIRST(&h->ords); + SLIST_REMOVE_HEAD(&h->ords, entry); + free(ord); + } + + while ( ! SLIST_EMPTY(&h->tags)) { + tag = SLIST_FIRST(&h->tags); + SLIST_REMOVE_HEAD(&h->tags, entry); + free(tag); + } + + if (h->buf) + free(h->buf); + if (h->symtab) + chars_free(h->symtab); + + free(h); } +void +print_gen_head(struct html *h) +{ + struct htmlpair tag[4]; + + tag[0].key = ATTR_HTTPEQUIV; + tag[0].val = "Content-Type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "text/html; charset=utf-8"; + print_otag(h, TAG_META, 2, tag); + + tag[0].key = ATTR_NAME; + tag[0].val = "resource-type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "document"; + print_otag(h, TAG_META, 2, tag); + + if (h->style) { + tag[0].key = ATTR_REL; + tag[0].val = "stylesheet"; + tag[1].key = ATTR_HREF; + tag[1].val = h->style; + tag[2].key = ATTR_TYPE; + tag[2].val = "text/css"; + tag[3].key = ATTR_MEDIA; + tag[3].val = "all"; + print_otag(h, TAG_LINK, 4, tag); + } +} + + +static void +print_spec(struct html *h, const char *p, int len) +{ + const char *rhs; + int i; + size_t sz; + + rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz); + + if (NULL == rhs) + return; + for (i = 0; i < (int)sz; i++) + putchar(rhs[i]); +} + + +static void +print_res(struct html *h, const char *p, int len) +{ + const char *rhs; + int i; + size_t sz; + + rhs = chars_a2res(h->symtab, p, (size_t)len, &sz); + + if (NULL == rhs) + return; + for (i = 0; i < (int)sz; i++) + putchar(rhs[i]); +} + + +static void +print_escape(struct html *h, const char **p) +{ + int j, type; + const char *wp; + + wp = *p; + type = 1; + + if (0 == *(++wp)) { + *p = wp; + return; + } + + if ('(' == *wp) { + wp++; + if (0 == *wp || 0 == *(wp + 1)) { + *p = 0 == *wp ? wp : wp + 1; + return; + } + + print_spec(h, wp, 2); + *p = ++wp; + return; + + } else if ('*' == *wp) { + if (0 == *(++wp)) { + *p = wp; + return; + } + + switch (*wp) { + case ('('): + wp++; + if (0 == *wp || 0 == *(wp + 1)) { + *p = 0 == *wp ? wp : wp + 1; + return; + } + + print_res(h, wp, 2); + *p = ++wp; + return; + case ('['): + type = 0; + break; + default: + print_res(h, wp, 1); + *p = wp; + return; + } + + } else if ('f' == *wp) { + if (0 == *(++wp)) { + *p = wp; + return; + } + + switch (*wp) { + case ('B'): + /* TODO */ + break; + case ('I'): + /* TODO */ + break; + case ('P'): + /* FALLTHROUGH */ + case ('R'): + /* TODO */ + break; + default: + break; + } + + *p = wp; + return; + + } else if ('[' != *wp) { + print_spec(h, wp, 1); + *p = wp; + return; + } + + wp++; + for (j = 0; *wp && ']' != *wp; wp++, j++) + /* Loop... */ ; + + if (0 == *wp) { + *p = wp; + return; + } + + if (type) + print_spec(h, wp - j, j); + else + print_res(h, wp - j, j); + + *p = wp; +} + + +static void +print_encode(struct html *h, const char *p) +{ + + for (; *p; p++) { + if ('\\' == *p) { + print_escape(h, &p); + continue; + } + switch (*p) { + case ('<'): + printf("<"); + break; + case ('>'): + printf(">"); + break; + case ('&'): + printf("&"); + break; + default: + putchar(*p); + break; + } + } +} + + +struct tag * +print_otag(struct html *h, enum htmltag tag, + int sz, const struct htmlpair *p) +{ + int i; + struct tag *t; + + if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { + if (NULL == (t = malloc(sizeof(struct tag)))) + err(EXIT_FAILURE, "malloc"); + t->tag = tag; + SLIST_INSERT_HEAD(&h->tags, t, entry); + } else + t = NULL; + + if ( ! (HTML_NOSPACE & h->flags)) + if ( ! (HTML_CLRLINE & htmltags[tag].flags)) + printf(" "); + + printf("<%s", htmltags[tag].name); + for (i = 0; i < sz; i++) { + printf(" %s=\"", htmlattrs[p[i].key]); + assert(p->val); + print_encode(h, p[i].val); + printf("\""); + } + printf(">"); + + h->flags |= HTML_NOSPACE; + if (HTML_CLRLINE & htmltags[tag].flags) + h->flags |= HTML_NEWLINE; + else + h->flags &= ~HTML_NEWLINE; + + return(t); +} + + /* ARGSUSED */ -void * -md_init_html(const struct md_args *args, - struct md_mbuf *mbuf, const struct md_rbuf *rbuf) +static void +print_ctag(struct html *h, enum htmltag tag) { + + printf("", htmltags[tag].name); + if (HTML_CLRLINE & htmltags[tag].flags) + h->flags |= HTML_NOSPACE; + if (HTML_CLRLINE & htmltags[tag].flags) + h->flags |= HTML_NEWLINE; + else + h->flags &= ~HTML_NEWLINE; +} - return(NULL); + +/* ARGSUSED */ +void +print_gen_doctype(struct html *h) +{ + + printf("", DOCTYPE, DTD); +} + + +void +print_text(struct html *h, const char *p) +{ + + if (*p && 0 == *(p + 1)) + switch (*p) { + case('.'): + /* FALLTHROUGH */ + case(','): + /* FALLTHROUGH */ + case(';'): + /* FALLTHROUGH */ + case(':'): + /* FALLTHROUGH */ + case('?'): + /* FALLTHROUGH */ + case('!'): + /* FALLTHROUGH */ + case(')'): + /* FALLTHROUGH */ + case(']'): + /* FALLTHROUGH */ + case('}'): + if ( ! (HTML_IGNDELIM & h->flags)) + h->flags |= HTML_NOSPACE; + break; + default: + break; + } + + if ( ! (h->flags & HTML_NOSPACE)) + printf(" "); + + h->flags &= ~HTML_NOSPACE; + h->flags &= ~HTML_NEWLINE; + + if (p) + print_encode(h, p); + + if (*p && 0 == *(p + 1)) + switch (*p) { + case('('): + /* FALLTHROUGH */ + case('['): + /* FALLTHROUGH */ + case('{'): + h->flags |= HTML_NOSPACE; + break; + default: + break; + } +} + + +void +print_tagq(struct html *h, const struct tag *until) +{ + struct tag *tag; + + while ( ! SLIST_EMPTY(&h->tags)) { + tag = SLIST_FIRST(&h->tags); + print_ctag(h, tag->tag); + SLIST_REMOVE_HEAD(&h->tags, entry); + free(tag); + if (until && tag == until) + return; + } +} + + +void +print_stagq(struct html *h, const struct tag *suntil) +{ + struct tag *tag; + + while ( ! SLIST_EMPTY(&h->tags)) { + tag = SLIST_FIRST(&h->tags); + if (suntil && tag == suntil) + return; + print_ctag(h, tag->tag); + SLIST_REMOVE_HEAD(&h->tags, entry); + free(tag); + } }