===================================================================
RCS file: /cvs/mandoc/term_ascii.c,v
retrieving revision 1.54
retrieving revision 1.68
diff -u -p -r1.54 -r1.68
--- mandoc/term_ascii.c	2016/07/31 09:29:13	1.54
+++ mandoc/term_ascii.c	2022/08/16 17:45:55	1.68
@@ -1,7 +1,7 @@
-/*	$Id: term_ascii.c,v 1.54 2016/07/31 09:29:13 schwarze Exp $ */
+/* $Id: term_ascii.c,v 1.68 2022/08/16 17:45:55 schwarze Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -21,11 +21,13 @@
 
 #include <assert.h>
 #if HAVE_WCHAR
+#include <langinfo.h>
 #include <locale.h>
 #endif
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #if HAVE_WCHAR
 #include <wchar.h>
@@ -65,13 +67,14 @@ ascii_init(enum termenc enc, const struct manoutput *o
 #endif
 	struct termp	*p;
 
-	p = mandoc_calloc(1, sizeof(struct termp));
+	p = mandoc_calloc(1, sizeof(*p));
+	p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol));
+	p->maxtcol = 1;
 
 	p->line = 1;
-	p->tabwidth = 5;
 	p->defrmargin = p->lastrmargin = 78;
 	p->fontq = mandoc_reallocarray(NULL,
-	     (p->fontsz = 8), sizeof(enum termfont));
+	     (p->fontsz = 8), sizeof(*p->fontq));
 	p->fontq[0] = p->fontl = TERMFONT_NONE;
 
 	p->begin = ascii_begin;
@@ -87,7 +90,7 @@ ascii_init(enum termenc enc, const struct manoutput *o
 	p->width = ascii_width;
 
 #if HAVE_WCHAR
-	if (TERMENC_ASCII != enc) {
+	if (enc != TERMENC_ASCII) {
 
 		/*
 		 * Do not change any of this to LC_ALL.  It might break
@@ -96,11 +99,21 @@ ascii_init(enum termenc enc, const struct manoutput *o
 		 * worst case, it might even cause buffer overflows.
 		 */
 
-		v = TERMENC_LOCALE == enc ?
+		v = enc == TERMENC_LOCALE ?
 		    setlocale(LC_CTYPE, "") :
 		    setlocale(LC_CTYPE, UTF8_LOCALE);
-		if (NULL != v && MB_CUR_MAX > 1) {
-			p->enc = enc;
+
+		/*
+		 * We only support UTF-8,
+		 * so revert to ASCII for anything else.
+		 */
+
+		if (v != NULL &&
+		    strcmp(nl_langinfo(CODESET), "UTF-8") != 0)
+			v = setlocale(LC_CTYPE, "C");
+
+		if (v != NULL && MB_CUR_MAX > 1) {
+			p->enc = TERMENC_UTF8;
 			p->advance = locale_advance;
 			p->endline = locale_endline;
 			p->letter = locale_letter;
@@ -120,6 +133,8 @@ ascii_init(enum termenc enc, const struct manoutput *o
 	if (outopts->synopsisonly)
 		p->synopsisonly = 1;
 
+	assert(p->defindent < UINT16_MAX);
+	assert(p->defrmargin < UINT16_MAX);
 	return p;
 }
 
@@ -149,7 +164,7 @@ ascii_setwidth(struct termp *p, int iop, int width)
 {
 
 	width /= 24;
-	p->rmargin = p->defrmargin;
+	p->tcol->rmargin = p->defrmargin;
 	if (iop > 0)
 		p->defrmargin += width;
 	else if (iop == 0)
@@ -158,8 +173,10 @@ ascii_setwidth(struct termp *p, int iop, int width)
 		p->defrmargin -= width;
 	else
 		p->defrmargin = 0;
-	p->lastrmargin = p->rmargin;
-	p->rmargin = p->maxrmargin = p->defrmargin;
+	if (p->defrmargin > 1000)
+		p->defrmargin = 1000;
+	p->lastrmargin = p->tcol->rmargin;
+	p->tcol->rmargin = p->maxrmargin = p->defrmargin;
 }
 
 void
@@ -179,8 +196,7 @@ terminal_sepline(void *arg)
 static size_t
 ascii_width(const struct termp *p, int c)
 {
-
-	return 1;
+	return c != ASCII_BREAK && c != ASCII_NBRZW && c != ASCII_TABREF;
 }
 
 void
@@ -216,6 +232,11 @@ ascii_endline(struct termp *p)
 {
 
 	p->line++;
+	if ((int)p->tcol->offset > p->ti)
+		p->tcol->offset -= p->ti;
+	else
+		p->tcol->offset = 0;
+	p->ti = 0;
 	putchar('\n');
 }
 
@@ -224,6 +245,14 @@ ascii_advance(struct termp *p, size_t len)
 {
 	size_t		i;
 
+	/*
+	 * XXX We used to have "assert(len < UINT16_MAX)" here.
+	 * that is not quite right because the input document
+	 * can trigger that by merely providing large input.
+	 * For now, simply truncate.
+	 */
+	if (len > 256)
+		len = 256;
 	for (i = 0; i < len; i++)
 		putchar(' ');
 }
@@ -290,18 +319,18 @@ ascii_uc2str(int uc)
 	"<80>",	"<81>",	"<82>",	"<83>",	"<84>",	"<85>",	"<86>",	"<87>",
 	"<88>",	"<89>",	"<8A>",	"<8B>",	"<8C>",	"<8D>",	"<8E>",	"<8F>",
 	"<90>",	"<91>",	"<92>",	"<93>",	"<94>",	"<95>",	"<96>",	"<97>",
-	"<99>",	"<99>",	"<9A>",	"<9B>",	"<9C>",	"<9D>",	"<9E>",	"<9F>",
-	nbrsp,	"!",	"/\bc",	"GBP",	"o\bx",	"=\bY",	"|",	"<sec>",
+	"<98>",	"<99>",	"<9A>",	"<9B>",	"<9C>",	"<9D>",	"<9E>",	"<9F>",
+	nbrsp,	"!",	"/\bc",	"-\bL",	"o\bx",	"=\bY",	"|",	"<section>",
 	"\"",	"(C)",	"_\ba",	"<<",	"~",	"",	"(R)",	"-",
-	"<deg>","+-",	"2",	"3",	"'",	",\bu",	"<par>",".",
-	",",	"1",	"_\bo",	">>",	"1/4",	"1/2",	"3/4",	"?",
+	"<degree>","+-","^2",	"^3",	"'","<micro>","<paragraph>",".",
+	",",	"^1",	"_\bo",	">>",	"1/4",	"1/2",	"3/4",	"?",
 	"`\bA",	"'\bA",	"^\bA",	"~\bA",	"\"\bA","o\bA",	"AE",	",\bC",
 	"`\bE",	"'\bE",	"^\bE",	"\"\bE","`\bI",	"'\bI",	"^\bI",	"\"\bI",
-	"-\bD",	"~\bN",	"`\bO",	"'\bO",	"^\bO",	"~\bO",	"\"\bO","x",
+	"Dh",	"~\bN",	"`\bO",	"'\bO",	"^\bO",	"~\bO",	"\"\bO","x",
 	"/\bO",	"`\bU",	"'\bU",	"^\bU",	"\"\bU","'\bY",	"Th",	"ss",
 	"`\ba",	"'\ba",	"^\ba",	"~\ba",	"\"\ba","o\ba",	"ae",	",\bc",
 	"`\be",	"'\be",	"^\be",	"\"\be","`\bi",	"'\bi",	"^\bi",	"\"\bi",
-	"d",	"~\bn",	"`\bo",	"'\bo",	"^\bo",	"~\bo",	"\"\bo","-:-",
+	"dh",	"~\bn",	"`\bo",	"'\bo",	"^\bo",	"~\bo",	"\"\bo","/",
 	"/\bo",	"`\bu",	"'\bu",	"^\bu",	"\"\bu","'\by",	"th",	"\"\by",
 	"A",	"a",	"A",	"a",	"A",	"a",	"'\bC",	"'\bc",
 	"^\bC",	"^\bc",	"C",	"c",	"C",	"c",	"D",	"d",
@@ -361,6 +390,14 @@ locale_advance(struct termp *p, size_t len)
 {
 	size_t		i;
 
+	/*
+	 * XXX We used to have "assert(len < UINT16_MAX)" here.
+	 * that is not quite right because the input document
+	 * can trigger that by merely providing large input.
+	 * For now, simply truncate.
+	 */
+	if (len > 256)
+		len = 256;
 	for (i = 0; i < len; i++)
 		putwchar(L' ');
 }
@@ -370,6 +407,11 @@ locale_endline(struct termp *p)
 {
 
 	p->line++;
+	if ((int)p->tcol->offset > p->ti)
+		p->tcol->offset -= p->ti;
+	else 
+		p->tcol->offset = 0;
+	p->ti = 0;
 	putwchar(L'\n');
 }