=================================================================== RCS file: /cvs/mandoc/term_ascii.c,v retrieving revision 1.39 retrieving revision 1.67 diff -u -p -r1.39 -r1.67 --- mandoc/term_ascii.c 2014/10/28 18:49:33 1.39 +++ mandoc/term_ascii.c 2022/08/15 18:12:30 1.67 @@ -1,15 +1,15 @@ -/* $Id: term_ascii.c,v 1.39 2014/10/28 18:49:33 schwarze Exp $ */ +/* $Id: term_ascii.c,v 1.67 2022/08/15 18:12:30 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2014 Ingo Schwarze + * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF @@ -21,11 +21,13 @@ #include #if HAVE_WCHAR +#include #include #endif #include #include #include +#include #include #if HAVE_WCHAR #include @@ -35,11 +37,11 @@ #include "mandoc_aux.h" #include "out.h" #include "term.h" +#include "manconf.h" #include "main.h" -static struct termp *ascii_init(enum termenc, - const struct mchars *, char *); -static double ascii_hspan(const struct termp *, +static struct termp *ascii_init(enum termenc, const struct manoutput *); +static int ascii_hspan(const struct termp *, const struct roffsu *); static size_t ascii_width(const struct termp *, int); static void ascii_advance(struct termp *, size_t); @@ -47,7 +49,7 @@ static void ascii_begin(struct termp *); static void ascii_end(struct termp *); static void ascii_endline(struct termp *); static void ascii_letter(struct termp *, int); -static void ascii_setwidth(struct termp *, int, size_t); +static void ascii_setwidth(struct termp *, int, int); #if HAVE_WCHAR static void locale_advance(struct termp *, size_t); @@ -58,17 +60,22 @@ static size_t locale_width(const struct termp *, in static struct termp * -ascii_init(enum termenc enc, const struct mchars *mchars, char *outopts) +ascii_init(enum termenc enc, const struct manoutput *outopts) { - const char *toks[5]; +#if HAVE_WCHAR char *v; +#endif struct termp *p; - p = mandoc_calloc(1, sizeof(struct termp)); + p = mandoc_calloc(1, sizeof(*p)); + p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol)); + p->maxtcol = 1; - p->symtab = mchars; - p->tabwidth = 5; + p->line = 1; p->defrmargin = p->lastrmargin = 78; + p->fontq = mandoc_reallocarray(NULL, + (p->fontsz = 8), sizeof(*p->fontq)); + p->fontq[0] = p->fontl = TERMFONT_NONE; p->begin = ascii_begin; p->end = ascii_end; @@ -83,12 +90,30 @@ ascii_init(enum termenc enc, const struct mchars *mcha p->width = ascii_width; #if HAVE_WCHAR - if (TERMENC_ASCII != enc) { - v = TERMENC_LOCALE == enc ? - setlocale(LC_ALL, "") : - setlocale(LC_CTYPE, "en_US.UTF-8"); - if (NULL != v && MB_CUR_MAX > 1) { - p->enc = enc; + if (enc != TERMENC_ASCII) { + + /* + * Do not change any of this to LC_ALL. It might break + * the formatting by subtly changing the behaviour of + * various functions, for example strftime(3). As a + * worst case, it might even cause buffer overflows. + */ + + v = enc == TERMENC_LOCALE ? + setlocale(LC_CTYPE, "") : + setlocale(LC_CTYPE, UTF8_LOCALE); + + /* + * We only support UTF-8, + * so revert to ASCII for anything else. + */ + + if (v != NULL && + strcmp(nl_langinfo(CODESET), "UTF-8") != 0) + v = setlocale(LC_CTYPE, "C"); + + if (v != NULL && MB_CUR_MAX > 1) { + p->enc = TERMENC_UTF8; p->advance = locale_advance; p->endline = locale_endline; p->letter = locale_letter; @@ -97,83 +122,81 @@ ascii_init(enum termenc enc, const struct mchars *mcha } #endif - toks[0] = "indent"; - toks[1] = "width"; - toks[2] = "mdoc"; - toks[3] = "synopsis"; - toks[4] = NULL; + if (outopts->mdoc) { + p->mdocstyle = 1; + p->defindent = 5; + } + if (outopts->indent) + p->defindent = outopts->indent; + if (outopts->width) + p->defrmargin = outopts->width; + if (outopts->synopsisonly) + p->synopsisonly = 1; - while (outopts && *outopts) - switch (getsubopt(&outopts, UNCONST(toks), &v)) { - case 0: - p->defindent = (size_t)atoi(v); - break; - case 1: - p->defrmargin = (size_t)atoi(v); - break; - case 2: - /* - * Temporary, undocumented mode - * to imitate mdoc(7) output style. - */ - p->mdocstyle = 1; - p->defindent = 5; - break; - case 3: - p->synopsisonly = 1; - break; - default: - break; - } - - /* Enforce a lower boundary. */ - if (p->defrmargin < 58) - p->defrmargin = 58; - - return(p); + assert(p->defindent < UINT16_MAX); + assert(p->defrmargin < UINT16_MAX); + return p; } void * -ascii_alloc(const struct mchars *mchars, char *outopts) +ascii_alloc(const struct manoutput *outopts) { - return(ascii_init(TERMENC_ASCII, mchars, outopts)); + return ascii_init(TERMENC_ASCII, outopts); } void * -utf8_alloc(const struct mchars *mchars, char *outopts) +utf8_alloc(const struct manoutput *outopts) { - return(ascii_init(TERMENC_UTF8, mchars, outopts)); + return ascii_init(TERMENC_UTF8, outopts); } void * -locale_alloc(const struct mchars *mchars, char *outopts) +locale_alloc(const struct manoutput *outopts) { - return(ascii_init(TERMENC_LOCALE, mchars, outopts)); + return ascii_init(TERMENC_LOCALE, outopts); } static void -ascii_setwidth(struct termp *p, int iop, size_t width) +ascii_setwidth(struct termp *p, int iop, int width) { - p->rmargin = p->defrmargin; - if (0 < iop) + width /= 24; + p->tcol->rmargin = p->defrmargin; + if (iop > 0) p->defrmargin += width; - else if (0 > iop) + else if (iop == 0) + p->defrmargin = width ? (size_t)width : p->lastrmargin; + else if (p->defrmargin > (size_t)width) p->defrmargin -= width; else - p->defrmargin = width ? width : p->lastrmargin; - p->lastrmargin = p->rmargin; - p->rmargin = p->maxrmargin = p->defrmargin; + p->defrmargin = 0; + if (p->defrmargin > 1000) + p->defrmargin = 1000; + p->lastrmargin = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; } +void +terminal_sepline(void *arg) +{ + struct termp *p; + size_t i; + + p = (struct termp *)arg; + (*p->endline)(p); + for (i = 0; i < p->defrmargin; i++) + (*p->letter)(p, '-'); + (*p->endline)(p); + (*p->endline)(p); +} + static size_t ascii_width(const struct termp *p, int c) { - - return(1); + return c != ASCII_BREAK && c != ASCII_NBRZW; } void @@ -208,6 +231,12 @@ static void ascii_endline(struct termp *p) { + p->line++; + if ((int)p->tcol->offset > p->ti) + p->tcol->offset -= p->ti; + else + p->tcol->offset = 0; + p->ti = 0; putchar('\n'); } @@ -216,56 +245,54 @@ ascii_advance(struct termp *p, size_t len) { size_t i; + /* + * XXX We used to have "assert(len < UINT16_MAX)" here. + * that is not quite right because the input document + * can trigger that by merely providing large input. + * For now, simply truncate. + */ + if (len > 256) + len = 256; for (i = 0; i < len; i++) putchar(' '); } -static double +static int ascii_hspan(const struct termp *p, const struct roffsu *su) { double r; - /* - * Approximate based on character width. - * None of these will be actually correct given that an inch on - * the screen depends on character size, terminal, etc., etc. - */ switch (su->unit) { case SCALE_BU: - r = su->scale * 10.0 / 240.0; + r = su->scale; break; case SCALE_CM: - r = su->scale * 10.0 / 2.54; + r = su->scale * 240.0 / 2.54; break; case SCALE_FS: - r = su->scale * 2730.666; + r = su->scale * 65536.0; break; case SCALE_IN: - r = su->scale * 10.0; + r = su->scale * 240.0; break; case SCALE_MM: - r = su->scale / 100.0; + r = su->scale * 0.24; break; + case SCALE_VS: case SCALE_PC: - r = su->scale * 10.0 / 6.0; + r = su->scale * 40.0; break; case SCALE_PT: - r = su->scale * 10.0 / 72.0; + r = su->scale * 10.0 / 3.0; break; - case SCALE_VS: - r = su->scale * 2.0 - 1.0; - break; case SCALE_EN: - /* FALLTHROUGH */ case SCALE_EM: - r = su->scale; + r = su->scale * 24.0; break; default: abort(); - /* NOTREACHED */ } - - return(r); + return r > 0.0 ? r + 0.01 : r - 0.01; } const char * @@ -292,18 +319,18 @@ ascii_uc2str(int uc) "<80>", "<81>", "<82>", "<83>", "<84>", "<85>", "<86>", "<87>", "<88>", "<89>", "<8A>", "<8B>", "<8C>", "<8D>", "<8E>", "<8F>", "<90>", "<91>", "<92>", "<93>", "<94>", "<95>", "<96>", "<97>", - "<99>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", - nbrsp, "!", "/\bc", "GBP", "o\bx", "=\bY", "|", "", + "<98>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", + nbrsp, "!", "/\bc", "-\bL", "o\bx", "=\bY", "|", "
", "\"", "(C)", "_\ba", "<<", "~", "", "(R)", "-", - "","+-", "2", "3", "'", ",\bu", "",".", - ",", "1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", + "","+-","^2", "^3", "'","","",".", + ",", "^1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", "`\bA", "'\bA", "^\bA", "~\bA", "\"\bA","o\bA", "AE", ",\bC", "`\bE", "'\bE", "^\bE", "\"\bE","`\bI", "'\bI", "^\bI", "\"\bI", - "-\bD", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", + "Dh", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", "/\bO", "`\bU", "'\bU", "^\bU", "\"\bU","'\bY", "Th", "ss", "`\ba", "'\ba", "^\ba", "~\ba", "\"\ba","o\ba", "ae", ",\bc", "`\be", "'\be", "^\be", "\"\be","`\bi", "'\bi", "^\bi", "\"\bi", - "d", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","-:-", + "dh", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","/", "/\bo", "`\bu", "'\bu", "^\bu", "\"\bu","'\by", "th", "\"\by", "A", "a", "A", "a", "A", "a", "'\bC", "'\bc", "^\bC", "^\bc", "C", "c", "C", "c", "D", "d", @@ -340,8 +367,8 @@ ascii_uc2str(int uc) assert(uc >= 0); if ((size_t)uc < sizeof(tab)/sizeof(tab[0])) - return(tab[uc]); - return(mchars_uc2str(uc)); + return tab[uc]; + return mchars_uc2str(uc); } #if HAVE_WCHAR @@ -355,7 +382,7 @@ locale_width(const struct termp *p, int c) rc = wcwidth(c); if (rc < 0) rc = 0; - return(rc); + return rc; } static void @@ -363,6 +390,14 @@ locale_advance(struct termp *p, size_t len) { size_t i; + /* + * XXX We used to have "assert(len < UINT16_MAX)" here. + * that is not quite right because the input document + * can trigger that by merely providing large input. + * For now, simply truncate. + */ + if (len > 256) + len = 256; for (i = 0; i < len; i++) putwchar(L' '); } @@ -371,6 +406,12 @@ static void locale_endline(struct termp *p) { + p->line++; + if ((int)p->tcol->offset > p->ti) + p->tcol->offset -= p->ti; + else + p->tcol->offset = 0; + p->ti = 0; putwchar(L'\n'); }