=================================================================== RCS file: /cvs/mandoc/roff_escape.c,v retrieving revision 1.14 retrieving revision 1.15 diff -u -p -r1.14 -r1.15 --- mandoc/roff_escape.c 2022/06/08 13:23:57 1.14 +++ mandoc/roff_escape.c 2024/05/16 21:23:00 1.15 @@ -1,4 +1,4 @@ -/* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */ +/* $Id: roff_escape.c,v 1.15 2024/05/16 21:23:00 schwarze Exp $ */ /* * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 * Ingo Schwarze @@ -467,13 +467,12 @@ roff_escape(const char *buf, const int ln, const int a /* * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be - * a valid Unicode codepoint. Here, however, only - * check the length and range. + * a valid Unicode codepoint. */ if (buf[iarg] != 'u' || argl < 5 || argl > 7) break; - if (argl == 7 && + if (argl == 7 && /* beyond the Unicode range */ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { err = MANDOCERR_ESC_BADCHAR; break; @@ -482,8 +481,9 @@ roff_escape(const char *buf, const int ln, const int a err = MANDOCERR_ESC_BADCHAR; break; } - if (argl == 5 && buf[iarg + 1] == 'D' && - strchr("89ABCDEF", buf[iarg + 2]) != NULL) { + if (argl == 5 && /* UTF-16 surrogate */ + toupper((unsigned char)buf[iarg + 1]) == 'D' && + strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) { err = MANDOCERR_ESC_BADCHAR; break; }