=================================================================== RCS file: /cvs/mandoc/html.c,v retrieving revision 1.176 retrieving revision 1.178 diff -u -p -r1.176 -r1.178 --- mandoc/html.c 2014/10/10 15:26:29 1.176 +++ mandoc/html.c 2014/10/27 13:31:04 1.178 @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.176 2014/10/10 15:26:29 schwarze Exp $ */ +/* $Id: html.c,v 1.178 2014/10/27 13:31:04 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -437,8 +437,18 @@ print_encode(struct html *h, const char *p, int norecu case ESCAPE_UNICODE: /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); - if ('\0' != c) - printf("&#x%x;", c); + + /* + * XXX Security warning: + * For now, forbid Unicode obfuscation of ASCII + * characters. An audit of the callers is + * required before this can be removed. + */ + + if (c < 0x80) + c = 0xFFFD; + + printf("&#x%x;", c); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); @@ -447,11 +457,12 @@ print_encode(struct html *h, const char *p, int norecu break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); - if (c > 0) + if (c <= 0) + break; + if (c < 0x20 || c > 0x7e) printf("&#%d;", c); - else if (-1 == c && 1 == len && - !print_escape(*seq)) - putchar((int)*seq); + else if ( ! print_escape(c)) + putchar(c); break; case ESCAPE_NOSPACE: if ('\0' == *p)