=================================================================== RCS file: /cvs/mandoc/term_ascii.c,v retrieving revision 1.58 retrieving revision 1.67 diff -u -p -r1.58 -r1.67 --- mandoc/term_ascii.c 2017/06/14 14:24:20 1.58 +++ mandoc/term_ascii.c 2022/08/15 18:12:30 1.67 @@ -1,7 +1,7 @@ -/* $Id: term_ascii.c,v 1.58 2017/06/14 14:24:20 schwarze Exp $ */ +/* $Id: term_ascii.c,v 1.67 2022/08/15 18:12:30 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2014, 2015, 2017 Ingo Schwarze + * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -21,11 +21,13 @@ #include #if HAVE_WCHAR +#include #include #endif #include #include #include +#include #include #if HAVE_WCHAR #include @@ -88,7 +90,7 @@ ascii_init(enum termenc enc, const struct manoutput *o p->width = ascii_width; #if HAVE_WCHAR - if (TERMENC_ASCII != enc) { + if (enc != TERMENC_ASCII) { /* * Do not change any of this to LC_ALL. It might break @@ -97,11 +99,21 @@ ascii_init(enum termenc enc, const struct manoutput *o * worst case, it might even cause buffer overflows. */ - v = TERMENC_LOCALE == enc ? + v = enc == TERMENC_LOCALE ? setlocale(LC_CTYPE, "") : setlocale(LC_CTYPE, UTF8_LOCALE); - if (NULL != v && MB_CUR_MAX > 1) { - p->enc = enc; + + /* + * We only support UTF-8, + * so revert to ASCII for anything else. + */ + + if (v != NULL && + strcmp(nl_langinfo(CODESET), "UTF-8") != 0) + v = setlocale(LC_CTYPE, "C"); + + if (v != NULL && MB_CUR_MAX > 1) { + p->enc = TERMENC_UTF8; p->advance = locale_advance; p->endline = locale_endline; p->letter = locale_letter; @@ -121,6 +133,8 @@ ascii_init(enum termenc enc, const struct manoutput *o if (outopts->synopsisonly) p->synopsisonly = 1; + assert(p->defindent < UINT16_MAX); + assert(p->defrmargin < UINT16_MAX); return p; } @@ -159,6 +173,8 @@ ascii_setwidth(struct termp *p, int iop, int width) p->defrmargin -= width; else p->defrmargin = 0; + if (p->defrmargin > 1000) + p->defrmargin = 1000; p->lastrmargin = p->tcol->rmargin; p->tcol->rmargin = p->maxrmargin = p->defrmargin; } @@ -180,8 +196,7 @@ terminal_sepline(void *arg) static size_t ascii_width(const struct termp *p, int c) { - - return 1; + return c != ASCII_BREAK && c != ASCII_NBRZW; } void @@ -217,7 +232,10 @@ ascii_endline(struct termp *p) { p->line++; - p->tcol->offset -= p->ti; + if ((int)p->tcol->offset > p->ti) + p->tcol->offset -= p->ti; + else + p->tcol->offset = 0; p->ti = 0; putchar('\n'); } @@ -227,6 +245,14 @@ ascii_advance(struct termp *p, size_t len) { size_t i; + /* + * XXX We used to have "assert(len < UINT16_MAX)" here. + * that is not quite right because the input document + * can trigger that by merely providing large input. + * For now, simply truncate. + */ + if (len > 256) + len = 256; for (i = 0; i < len; i++) putchar(' '); } @@ -294,17 +320,17 @@ ascii_uc2str(int uc) "<88>", "<89>", "<8A>", "<8B>", "<8C>", "<8D>", "<8E>", "<8F>", "<90>", "<91>", "<92>", "<93>", "<94>", "<95>", "<96>", "<97>", "<98>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", - nbrsp, "!", "/\bc", "GBP", "o\bx", "=\bY", "|", "", + nbrsp, "!", "/\bc", "-\bL", "o\bx", "=\bY", "|", "
", "\"", "(C)", "_\ba", "<<", "~", "", "(R)", "-", - "","+-", "2", "3", "'", ",\bu", "",".", - ",", "1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", + "","+-","^2", "^3", "'","","",".", + ",", "^1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", "`\bA", "'\bA", "^\bA", "~\bA", "\"\bA","o\bA", "AE", ",\bC", "`\bE", "'\bE", "^\bE", "\"\bE","`\bI", "'\bI", "^\bI", "\"\bI", - "-\bD", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", + "Dh", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", "/\bO", "`\bU", "'\bU", "^\bU", "\"\bU","'\bY", "Th", "ss", "`\ba", "'\ba", "^\ba", "~\ba", "\"\ba","o\ba", "ae", ",\bc", "`\be", "'\be", "^\be", "\"\be","`\bi", "'\bi", "^\bi", "\"\bi", - "d", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","-:-", + "dh", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","/", "/\bo", "`\bu", "'\bu", "^\bu", "\"\bu","'\by", "th", "\"\by", "A", "a", "A", "a", "A", "a", "'\bC", "'\bc", "^\bC", "^\bc", "C", "c", "C", "c", "D", "d", @@ -364,6 +390,14 @@ locale_advance(struct termp *p, size_t len) { size_t i; + /* + * XXX We used to have "assert(len < UINT16_MAX)" here. + * that is not quite right because the input document + * can trigger that by merely providing large input. + * For now, simply truncate. + */ + if (len > 256) + len = 256; for (i = 0; i < len; i++) putwchar(L' '); } @@ -373,7 +407,10 @@ locale_endline(struct termp *p) { p->line++; - p->tcol->offset -= p->ti; + if ((int)p->tcol->offset > p->ti) + p->tcol->offset -= p->ti; + else + p->tcol->offset = 0; p->ti = 0; putwchar(L'\n'); }