[BACK]Return to out.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / out.c (download)

Revision 1.87, Wed Jul 16 14:33:08 2025 UTC (2 months ago) by schwarze
Branch: MAIN
CVS Tags: HEAD
Changes since 1.86: +42 -21 lines

In 2009, kristaps@ decided that using EN as the basic unit for
horizontal widths on terminal output devices was the simplest option.
It made sense because one EN is the smallest possible horizontal
cursor movement on a terminal.  In 2014, kristaps@ implemented the 'u'
unit (BU, "basic unit") for scaled width input as 1/24 EN, but all
internal variables continued to store widths in units of EN.

This causes compatibility problems with groff, which internally
represents widths in units of BU rather than EN.  Various high-level
macros internally calculate sums of user-controlled widths with a
sub-EN precision.  In mandoc, the coarse internal representation
causes rounding errors, in some cases resulting in wrong indentations.

Fix this by changing many internal variables in the terminal formatter
to store widths in units of BU instead of EN, improving compatibility.

An example of a manual page where mandoc output becomes more
compatible with groff output is clang(1) - a page generated by the
atrocious rst2man(1) tool, which insists, among other insanity, on
indenting by non-integer multiples of EN.

This patch includes the following related changes:
* The termp.advance and endline functions now update termp.viscol.
* Rename termp.width to termp.getwidth to help grep(1).
* Delete term_hen() which becomes the same as term_hspan().
* Delete the unused struct termp_tbl.
* Delete the unused tbl_sulen functions.

In the tbl_term formatter, pass an additional coloff argument to
all the data and fill functions, to deal with mismatches between
the desired and the actual cursor position: the desired position
is now maintained with BU precision, but the actual position is of
course still an integer multiple of EN.

Also simplify tbl_literal() by leaving TERMP_CENTER and TERMP_RIGHT
adjustment to term_flushln(), which already contains code for that
very purpose.

We are really lucky to have the regress/ test suite.  Without it,
tricky and pervasive low-level refactoring of this kind would be
impossible because it would unavoidably cause lots of regressions.

/* $Id: out.c,v 1.87 2025/07/16 14:33:08 schwarze Exp $ */
/*
 * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2019, 2021, 2025
 *               Ingo Schwarze <schwarze@openbsd.org>
 * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "mandoc_aux.h"
#include "mandoc.h"
#include "tbl.h"
#include "out.h"

struct	tbl_colgroup {
	struct tbl_colgroup	*next;
	size_t			 wanted;
	int			 startcol;
	int			 endcol;
};

static	size_t	tblcalc_data(struct rofftbl *, struct roffcol *,
			const struct tbl_opts *, const struct tbl_dat *,
			size_t);
static	size_t	tblcalc_literal(struct rofftbl *, struct roffcol *,
			const struct tbl_dat *, size_t);
static	size_t	tblcalc_number(struct rofftbl *, struct roffcol *,
			const struct tbl_opts *, const struct tbl_dat *);


/*
 * Parse the *src string and store a scaling unit into *dst.
 * If the string doesn't specify the unit, use the default.
 * If no default is specified, fail.
 * Return a pointer to the byte after the last byte used,
 * or NULL on total failure.
 */
const char *
a2roffsu(const char *src, struct roffsu *dst, enum roffscale def)
{
	char		*endptr;

	dst->unit = def == SCALE_MAX ? SCALE_BU : def;
	dst->scale = strtod(src, &endptr);
	if (endptr == src)
		return NULL;

	switch (*endptr++) {
	case 'c':
		dst->unit = SCALE_CM;
		break;
	case 'i':
		dst->unit = SCALE_IN;
		break;
	case 'f':
		dst->unit = SCALE_FS;
		break;
	case 'M':
		dst->unit = SCALE_MM;
		break;
	case 'm':
		dst->unit = SCALE_EM;
		break;
	case 'n':
		dst->unit = SCALE_EN;
		break;
	case 'P':
		dst->unit = SCALE_PC;
		break;
	case 'p':
		dst->unit = SCALE_PT;
		break;
	case 'u':
		dst->unit = SCALE_BU;
		break;
	case 'v':
		dst->unit = SCALE_VS;
		break;
	default:
		endptr--;
		if (SCALE_MAX == def)
			return NULL;
		dst->unit = def;
		break;
	}
	return endptr;
}

/*
 * Calculate the abstract widths and decimal positions of columns in a
 * table.  This routine allocates the columns structures then runs over
 * all rows and cells in the table.  The function pointers in "tbl" are
 * used for the actual width calculations.
 */
void
tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
    size_t offset, size_t rmargin)
{
	const struct tbl_opts	*opts;
	const struct tbl_span	*sp;
	const struct tbl_dat	*dp;
	struct roffcol		*col;
	struct tbl_colgroup	*first_group, **gp, *g;

	/* Widths in basic units. */
	size_t	*colwidth; /* Widths of all columns. */
	size_t	 min1;     /* Width of the narrowest column. */
	size_t	 min2;     /* Width of the second narrowest column. */
	size_t	 wanted;   /* For any of the narrowest columns. */
	size_t	 xwidth;   /* Total width of columns not to expand. */
	size_t	 ewidth;   /* Width of widest column to equalize. */
	size_t	 width;    /* Width of the data in basic units. */
	size_t	 enw;      /* Width of one EN unit. */

	int	 icol;     /* Column number, starting at zero. */
	int	 maxcol;   /* Number of last column. */
	int	 necol;    /* Number of columns to equalize. */
	int	 nxcol;    /* Number of columns to expand. */
	int	 done;	   /* Boolean: this group is wide enough. */
	int	 quirkcol;

	/*
	 * Allocate the master column specifiers.  These will hold the
	 * widths and decimal positions for all cells in the column.  It
	 * must be freed and nullified by the caller.
	 */

	assert(tbl->cols == NULL);
	tbl->cols = mandoc_calloc((size_t)sp_first->opts->cols,
	    sizeof(struct roffcol));
	opts = sp_first->opts;

	maxcol = -1;
	first_group = NULL;
	enw = (*tbl->len)(1, tbl->arg);
	for (sp = sp_first; sp != NULL; sp = sp->next) {
		if (sp->pos != TBL_SPAN_DATA)
			continue;

		/*
		 * Account for the data cells in the layout, matching it
		 * to data cells in the data section.
		 */

		for (dp = sp->first; dp != NULL; dp = dp->next) {
			icol = dp->layout->col;
			while (maxcol < icol + dp->hspans)
				tbl->cols[++maxcol].spacing = SIZE_MAX;
			col = tbl->cols + icol;
			col->flags |= dp->layout->flags;
			if (dp->layout->flags & TBL_CELL_WIGN)
				continue;

			/* Handle explicit width specifications. */
			if (col->width < dp->layout->width)
				col->width = dp->layout->width;
			if (dp->layout->spacing != SIZE_MAX &&
			    (col->spacing == SIZE_MAX ||
			     col->spacing < dp->layout->spacing))
				col->spacing = dp->layout->spacing;

			/*
			 * Calculate an automatic width.
			 * Except for spanning cells, apply it.
			 */

			width = tblcalc_data(tbl,
			    dp->hspans == 0 ? col : NULL,
			    opts, dp,
			    dp->block == 0 ? 0 :
			    dp->layout->width ? dp->layout->width :
			    rmargin ? (rmargin / enw + sp->opts->cols / 2) /
			    (sp->opts->cols + 1) * enw : 0);
			if (dp->hspans == 0)
				continue;

			/*
			 * Build a singly linked list
			 * of all groups of columns joined by spans,
			 * recording the minimum width for each group.
			 */

			gp = &first_group;
			while (*gp != NULL && ((*gp)->startcol != icol ||
			    (*gp)->endcol != icol + dp->hspans))
				gp = &(*gp)->next;
			if (*gp == NULL) {
				g = mandoc_malloc(sizeof(*g));
				g->next = *gp;
				g->wanted = width;
				g->startcol = icol;
				g->endcol = icol + dp->hspans;
				*gp = g;
			} else if ((*gp)->wanted < width)
				(*gp)->wanted = width;
		}
	}

	/*
	 * The minimum width of columns explicitly specified
	 * in the layout is 1n.
	 */

	if (maxcol < sp_first->opts->cols - 1)
		maxcol = sp_first->opts->cols - 1;
	for (icol = 0; icol <= maxcol; icol++) {
		col = tbl->cols + icol;
		if (col->width < enw)
			col->width = enw;

		/*
		 * Column spacings are needed for span width
		 * calculations, so set the default values now.
		 */

		if (col->spacing == SIZE_MAX || icol == maxcol)
			col->spacing = 3;
	}

	/*
	 * Replace the minimum widths with the missing widths,
	 * and dismiss groups that are already wide enough.
	 */

	gp = &first_group;
	while ((g = *gp) != NULL) {
		done = 0;
		for (icol = g->startcol; icol <= g->endcol; icol++) {
			width = tbl->cols[icol].width;
			if (icol < g->endcol)
				width += (*tbl->len)(tbl->cols[icol].spacing,
				    tbl->arg);
			if (g->wanted <= width) {
				done = 1;
				break;
			} else
				g->wanted -= width;
		}
		if (done) {
			*gp = g->next;
			free(g);
		} else
			gp = &g->next;
	}

	colwidth = mandoc_reallocarray(NULL, maxcol + 1, sizeof(*colwidth));
	while (first_group != NULL) {

		/*
		 * Rebuild the array of the widths of all columns
		 * participating in spans that require expansion.
		 */

		for (icol = 0; icol <= maxcol; icol++)
			colwidth[icol] = SIZE_MAX;
		for (g = first_group; g != NULL; g = g->next)
			for (icol = g->startcol; icol <= g->endcol; icol++)
				colwidth[icol] = tbl->cols[icol].width;

		/*
		 * Find the smallest and second smallest column width
		 * among the columns which may need expamsion.
		 */

		min1 = min2 = SIZE_MAX;
		for (icol = 0; icol <= maxcol; icol++) {
			width = colwidth[icol];
			if (min1 > width) {
				min2 = min1;
				min1 = width;
			} else if (min1 < width && min2 > width)
				min2 = width;
		}

		/*
		 * Find the minimum wanted width
		 * for any one of the narrowest columns,
		 * and mark the columns wanting that width.
		 */

		wanted = min2;
		for (g = first_group; g != NULL; g = g->next) {
			necol = 0;
			for (icol = g->startcol; icol <= g->endcol; icol++)
				if (colwidth[icol] == min1)
					necol++;
			if (necol == 0)
				continue;
			width = min1 + (g->wanted - 1) / necol + 1;
			if (width > min2)
				width = min2;
			if (wanted > width)
				wanted = width;
		}

		/* Record the effect of the widening. */

		gp = &first_group;
		while ((g = *gp) != NULL) {
			done = 0;
			for (icol = g->startcol; icol <= g->endcol; icol++) {
				if (colwidth[icol] != min1)
					continue;
				if (g->wanted <= wanted - min1) {
					tbl->cols[icol].width += g->wanted;
					done = 1;
					break;
				}
				tbl->cols[icol].width = wanted;
				g->wanted -= wanted - min1;
			}
			if (done) {
				*gp = g->next;
				free(g);
			} else
				gp = &g->next;
		}
	}
	free(colwidth);

	/*
	 * Align numbers with text.
	 * Count columns to equalize and columns to maximize.
	 * Find maximum width of the columns to equalize.
	 * Find total width of the columns *not* to maximize.
	 */

	necol = nxcol = 0;
	ewidth = xwidth = 0;
	for (icol = 0; icol <= maxcol; icol++) {
		col = tbl->cols + icol;
		if (col->width > col->nwidth)
			col->decimal += (col->width - col->nwidth) / 2;
		if (col->flags & TBL_CELL_EQUAL) {
			necol++;
			if (ewidth < col->width)
				ewidth = col->width;
		}
		if (col->flags & TBL_CELL_WMAX)
			nxcol++;
		else
			xwidth += col->width;
	}

	/*
	 * Equalize columns, if requested for any of them.
	 * Update total width of the columns not to maximize.
	 */

	if (necol) {
		for (icol = 0; icol <= maxcol; icol++) {
			col = tbl->cols + icol;
			if ( ! (col->flags & TBL_CELL_EQUAL))
				continue;
			if (col->width == ewidth)
				continue;
			if (nxcol && rmargin)
				xwidth += ewidth - col->width;
			col->width = ewidth;
		}
	}

	/*
	 * If there are any columns to maximize, find the total
	 * available width, deducting 3n margins between columns.
	 * Distribute the available width evenly.
	 */

	if (nxcol && rmargin) {
		xwidth += (*tbl->len)(3 * maxcol +
		    (opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) ?
		     2 : !!opts->lvert + !!opts->rvert), tbl->arg);
		if (rmargin <= offset + xwidth)
			return;
		xwidth = rmargin - offset - xwidth;

		/*
		 * Emulate a bug in GNU tbl width calculation that
		 * manifests itself for large numbers of x-columns.
		 * Emulating it for 5 x-columns gives identical
		 * behaviour for up to 6 x-columns.
		 */

		if (nxcol == 5) {
			quirkcol = xwidth / enw % nxcol + 2;
			if (quirkcol != 3 && quirkcol != 4)
				quirkcol = -1;
		} else
			quirkcol = -1;

		necol = 0;
		ewidth = 0;
		for (icol = 0; icol <= maxcol; icol++) {
			col = tbl->cols + icol;
			if ( ! (col->flags & TBL_CELL_WMAX))
				continue;
			col->width = (double)xwidth * ++necol / nxcol
			    - ewidth + 0.4995;
			if (necol == quirkcol)
				col->width -= enw;
			ewidth += col->width;
		}
	}
}

static size_t
tblcalc_data(struct rofftbl *tbl, struct roffcol *col,
    const struct tbl_opts *opts, const struct tbl_dat *dp, size_t mw)
{
	size_t		 sz;

	/* Branch down into data sub-types. */

	switch (dp->layout->pos) {
	case TBL_CELL_HORIZ:
	case TBL_CELL_DHORIZ:
		sz = (*tbl->len)(1, tbl->arg);
		if (col != NULL && col->width < sz)
			col->width = sz;
		return sz;
	case TBL_CELL_LONG:
	case TBL_CELL_CENTRE:
	case TBL_CELL_LEFT:
	case TBL_CELL_RIGHT:
		return tblcalc_literal(tbl, col, dp, mw);
	case TBL_CELL_NUMBER:
		return tblcalc_number(tbl, col, opts, dp);
	case TBL_CELL_DOWN:
		return 0;
	default:
		abort();
	}
}

static size_t
tblcalc_literal(struct rofftbl *tbl, struct roffcol *col,
    const struct tbl_dat *dp, size_t mw)
{
	const char	*str;	/* Beginning of the first line. */
	const char	*beg;	/* Beginning of the current line. */
	char		*end;	/* End of the current line. */

	/* Widths in basic units. */
	size_t		 lsz;	/* Of the current line. */
	size_t		 wsz;	/* Of the current word. */
	size_t		 msz;   /* Of the longest line. */
	size_t		 enw;	/* Of one EN unit. */

	if (dp->string == NULL || *dp->string == '\0')
		return 0;
	str = mw ? mandoc_strdup(dp->string) : dp->string;
	msz = lsz = 0;
	for (beg = str; beg != NULL && *beg != '\0'; beg = end) {
		end = mw ? strchr(beg, ' ') : NULL;
		if (end != NULL) {
			*end++ = '\0';
			while (*end == ' ')
				end++;
		}
		wsz = (*tbl->slen)(beg, tbl->arg);
		enw = (*tbl->len)(1, tbl->arg);
		if (mw && lsz && lsz + enw + wsz <= mw)
			lsz += enw + wsz;
		else
			lsz = wsz;
		if (msz < lsz)
			msz = lsz;
	}
	if (mw)
		free((void *)str);
	if (col != NULL && col->width < msz)
		col->width = msz;
	return msz;
}

static size_t
tblcalc_number(struct rofftbl *tbl, struct roffcol *col,
		const struct tbl_opts *opts, const struct tbl_dat *dp)
{
	const char	*cp, *lastdigit, *lastpoint;
	size_t		 totsz;	/* Total width of the number in basic units. */
	size_t		 intsz; /* Width of the integer part in basic units. */
	char		 buf[2];

	if (dp->string == NULL || *dp->string == '\0')
		return 0;

	totsz = (*tbl->slen)(dp->string, tbl->arg);
	if (col == NULL)
		return totsz;

	/*
	 * Find the last digit and
	 * the last decimal point that is adjacent to a digit.
	 * The alignment indicator "\&" overrides everything.
	 */

	lastdigit = lastpoint = NULL;
	for (cp = dp->string; cp[0] != '\0'; cp++) {
		if (cp[0] == '\\' && cp[1] == '&') {
			lastdigit = lastpoint = cp;
			break;
		} else if (cp[0] == opts->decimal &&
		    (isdigit((unsigned char)cp[1]) ||
		     (cp > dp->string && isdigit((unsigned char)cp[-1]))))
			lastpoint = cp;
		else if (isdigit((unsigned char)cp[0]))
			lastdigit = cp;
	}

	/* Not a number, treat as a literal string. */

	if (lastdigit == NULL) {
		if (col != NULL && col->width < totsz)
			col->width = totsz;
		return totsz;
	}

	/* Measure the width of the integer part. */

	if (lastpoint == NULL)
		lastpoint = lastdigit + 1;
	intsz = 0;
	buf[1] = '\0';
	for (cp = dp->string; cp < lastpoint; cp++) {
		buf[0] = cp[0];
		intsz += (*tbl->slen)(buf, tbl->arg);
	}

	/*
         * If this number has more integer digits than all numbers
         * seen on earlier lines, shift them all to the right.
	 * If it has fewer, shift this number to the right.
	 */

	if (intsz > col->decimal) {
		col->nwidth += intsz - col->decimal;
		col->decimal = intsz;
	} else
		totsz += col->decimal - intsz;

	/* Update the maximum total width seen so far. */

	if (totsz > col->nwidth)
		col->nwidth = totsz;
	if (col->nwidth > col->width)
		col->width = col->nwidth;
	return totsz;
}