=================================================================== RCS file: /cvs/mandoc/html.c,v retrieving revision 1.81 retrieving revision 1.87 diff -u -p -r1.81 -r1.87 --- mandoc/html.c 2009/11/05 10:16:01 1.81 +++ mandoc/html.c 2009/11/14 12:04:59 1.87 @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.81 2009/11/05 10:16:01 kristaps Exp $ */ +/* $Id: html.c,v 1.87 2009/11/14 12:04:59 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -66,7 +66,7 @@ static const struct htmldata htmltags[TAG_MAX] = { {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */ }; -static const char *const htmlattrs[ATTR_MAX] = { +static const char *const htmlattrs[ATTR_MAX] = { "http-equiv", "content", "name", @@ -87,6 +87,13 @@ static const char *const htmlattrs[ATTR_MAX] = { extern int getsubopt(char **, char * const *, char **); #endif + +static void print_spec(struct html *, const char *, size_t); +static void print_res(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static int print_encode(struct html *, const char *); + + void * html_alloc(char *outopts) { @@ -186,12 +193,12 @@ print_gen_head(struct html *h) static void -print_spec(struct html *h, const char *p, int len) +print_spec(struct html *h, const char *p, size_t len) { const char *rhs; size_t sz; - rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz); + rhs = chars_a2ascii(h->symtab, p, len, &sz); if (NULL == rhs) return; @@ -200,12 +207,12 @@ print_spec(struct html *h, const char *p, int len) static void -print_res(struct html *h, const char *p, int len) +print_res(struct html *h, const char *p, size_t len) { const char *rhs; size_t sz; - rhs = chars_a2res(h->symtab, p, (size_t)len, &sz); + rhs = chars_a2res(h->symtab, p, len, &sz); if (NULL == rhs) return; @@ -213,109 +220,16 @@ print_res(struct html *h, const char *p, int len) } -static void -print_escape(struct html *h, const char **p) -{ - int j, type; - const char *wp; - - wp = *p; - type = 1; - - if (0 == *(++wp)) { - *p = wp; - return; - } - - if ('(' == *wp) { - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *p = 0 == *wp ? wp : wp + 1; - return; - } - - print_spec(h, wp, 2); - *p = ++wp; - return; - - } else if ('*' == *wp) { - if (0 == *(++wp)) { - *p = wp; - return; - } - - switch (*wp) { - case ('('): - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *p = 0 == *wp ? wp : wp + 1; - return; - } - - print_res(h, wp, 2); - *p = ++wp; - return; - case ('['): - type = 0; - break; - default: - print_res(h, wp, 1); - *p = wp; - return; - } - - } else if ('f' == *wp) { - if (0 == *(++wp)) { - *p = wp; - return; - } - - /* - * These aren't supported, as they're symmetry-breaking - * constructs that don't play well with hierarchical - * mark-up. Consider: - * - * \fBHello. - * .PP - * World. - * - * The style started before "Hello" wouldn't be able to - * propogate into the next `PP' because we'd exit the - * current paragraph's scope. - */ - - *p = wp; - return; - - } else if ('[' != *wp) { - print_spec(h, wp, 1); - *p = wp; - return; - } - - wp++; - for (j = 0; *wp && ']' != *wp; wp++, j++) - /* Loop... */ ; - - if (0 == *wp) { - *p = wp; - return; - } - - if (type) - print_spec(h, wp - j, j); - else - print_res(h, wp - j, j); - - *p = wp; -} - - -static void +static int print_encode(struct html *h, const char *p) { size_t sz; + int len, nospace; + const char *seq; + enum roffdeco deco; + nospace = 0; + for (; *p; p++) { sz = strcspn(p, "\\<>&"); @@ -323,19 +237,39 @@ print_encode(struct html *h, const char *p) p += /* LINTED */ sz; - if ('\\' == *p) { - print_escape(h, &p); + if ('<' == *p) { + printf("<"); continue; + } else if ('>' == *p) { + printf(">"); + continue; + } else if ('&' == *p) { + printf("&"); + continue; } else if ('\0' == *p) break; - if ('<' == *p) - printf("<"); - else if ('>' == *p) - printf(">"); - else if ('&' == *p) - printf("&"); + seq = ++p; + len = a2roffdeco(&deco, &seq, &sz); + + switch (deco) { + case (DECO_RESERVED): + print_res(h, seq, sz); + break; + case (DECO_SPECIAL): + print_spec(h, seq, sz); + break; + default: + break; + } + + p += len - 1; + + if (DECO_NOSPACE == deco && '\0' == *(p + 1)) + nospace = 1; } + + return(nospace); } @@ -366,17 +300,12 @@ print_otag(struct html *h, enum htmltag tag, for (i = 0; i < sz; i++) { printf(" %s=\"", htmlattrs[p[i].key]); assert(p->val); - print_encode(h, p[i].val); + (void)print_encode(h, p[i].val); putchar('\"'); } putchar('>'); h->flags |= HTML_NOSPACE; - if (HTML_CLRLINE & htmltags[tag].flags) - h->flags |= HTML_NEWLINE; - else - h->flags &= ~HTML_NEWLINE; - return(t); } @@ -389,10 +318,8 @@ print_ctag(struct html *h, enum htmltag tag) printf("", htmltags[tag].name); if (HTML_CLRLINE & htmltags[tag].flags) { h->flags |= HTML_NOSPACE; - h->flags |= HTML_NEWLINE; putchar('\n'); - } else - h->flags &= ~HTML_NEWLINE; + } } @@ -438,11 +365,9 @@ print_text(struct html *h, const char *p) if ( ! (h->flags & HTML_NOSPACE)) putchar(' '); - h->flags &= ~HTML_NOSPACE; - h->flags &= ~HTML_NEWLINE; - - if (p) - print_encode(h, p); + assert(p); + if ( ! print_encode(h, p)) + h->flags &= ~HTML_NOSPACE; if (*p && 0 == *(p + 1)) switch (*p) {