=================================================================== RCS file: /cvs/mandoc/chars.c,v retrieving revision 1.39 retrieving revision 1.44 diff -u -p -r1.39 -r1.44 --- mandoc/chars.c 2011/04/30 22:24:31 1.39 +++ mandoc/chars.c 2011/05/17 11:50:20 1.44 @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.39 2011/04/30 22:24:31 kristaps Exp $ */ +/* $Id: chars.c,v 1.44 2011/05/17 11:50:20 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -20,11 +20,13 @@ #endif #include +#include #include #include #include #include "mandoc.h" +#include "libmandoc.h" #define PRINT_HI 126 #define PRINT_LO 32 @@ -135,28 +137,38 @@ mchars_res2cp(struct mchars *arg, const char *p, size_ return(ln->unicode); } - /* - * Numbered character to literal character. + * Numbered character string to ASCII codepoint. + * This can only be a printable character (i.e., alnum, punct, space) so + * prevent the character from ruining our state (backspace, newline, and + * so on). + * If the character is illegal, returns '\0'. */ char mchars_num2char(const char *p, size_t sz) { int i; - if (sz > 3) + if ((i = mandoc_strntou(p, sz, 10)) < 0) return('\0'); - - i = atoi(p); - /* - * FIXME: - * This is wrong. Anything could be written here! - * This should be carefully screened for possible characters. - */ - return(i <= 0 || i > 255 ? '\0' : (char)i); + return(isprint(i) ? i : '\0'); } +/* + * Hex character string to Unicode codepoint. + * If the character is illegal, returns '\0'. + */ +int +mchars_num2uc(const char *p, size_t sz) +{ + int i; + if ((i = mandoc_strntou(p, sz, 16)) < 0) + return('\0'); + /* FIXME: make sure we're not in a bogus range. */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); +} + /* * Special character to string array. */ @@ -172,7 +184,6 @@ mchars_spec2str(struct mchars *arg, const char *p, siz *rsz = strlen(ln->ascii); return(ln->ascii); } - /* * Reserved word to string array.