[BACK]Return to term_ascii.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

File: [cvsweb.bsd.lv] / mandoc / term_ascii.c (download)

Revision 1.15, Tue May 17 22:32:45 2011 UTC (12 years, 11 months ago) by kristaps
Branch: MAIN
Changes since 1.14: +70 -13 lines

Locale support.  I'm checking this in to clean up fall-out in-tree, but
it looks pretty good.  Basically, the -Tlocale option propogates into
term_ascii.c, where we set locale-specific console call-backs IFF (1)
setlocale() works; (2) locale support is compiled in (see Makefile for
-DUSE_WCHAR); (3) the internal structure of wchar_t maps directly to
Unicode codepoints as defined by __STDC_ISO_10646__; and (4) the console
supports multi-byte characters.

To date, this configuration only supports GNU/Linux.  OpenBSD doesn't
export __STDC_ISO_10646__ although I'm told by stsp@openbsd.org that it
should (it has the correct map).  Apparently FreeBSD is the same way.
NetBSD?  Don't know.  Apple also supports this, but doesn't define the
macro.  Special-casing!

Benchmark: -Tlocale incurs less than 0.2 factor overhead when run
through several thousand manuals when UTF8 output is enabled.  Native
mode (whether directly -Tascii or through no locale or whatever) is
UNCHANGED: the function callbacks are the same as before.

Note.  If the underlying system does NOT support STDC_ISO_10646, there
is a "slow" version possible with iconv or other means of flipping from
a Unicode codepoint to a wchar_t.

/*	$Id: term_ascii.c,v 1.15 2011/05/17 22:32:45 kristaps Exp $ */
/*
 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <sys/types.h>

#include <assert.h>
#ifdef USE_WCHAR
# include <locale.h>
#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#ifdef USE_WCHAR
# include <wchar.h>
#endif

#include "mandoc.h"
#include "out.h"
#include "term.h"
#include "main.h"

#if ! defined(__STDC_ISO_10646__)
# undef USE_WCHAR
#endif

static	struct termp	 *ascii_init(enum termenc, char *);
static	double		  ascii_hspan(const struct termp *,
				const struct roffsu *);
static	size_t		  ascii_width(const struct termp *, int);
static	void		  ascii_advance(struct termp *, size_t);
static	void		  ascii_begin(struct termp *);
static	void		  ascii_end(struct termp *);
static	void		  ascii_endline(struct termp *);
static	void		  ascii_letter(struct termp *, int);

#ifdef	USE_WCHAR
static	void		  locale_advance(struct termp *, size_t);
static	void		  locale_endline(struct termp *);
static	void		  locale_letter(struct termp *, int);
static	size_t		  locale_width(const struct termp *, int);
#endif

static struct termp *
ascii_init(enum termenc enc, char *outopts)
{
	const char	*toks[2];
	char		*v;
	struct termp	*p;

	p = mandoc_calloc(1, sizeof(struct termp));
	p->enc = enc;

	p->tabwidth = 5;
	p->defrmargin = 78;

	p->begin = ascii_begin;
	p->end = ascii_end;
	p->hspan = ascii_hspan;
	p->type = TERMTYPE_CHAR;

	p->enc = TERMENC_ASCII;
	p->advance = ascii_advance;
	p->endline = ascii_endline;
	p->letter = ascii_letter;
	p->width = ascii_width;

#if defined (USE_WCHAR)
	if (TERMENC_LOCALE == enc)
		if (setlocale(LC_ALL, "") && MB_CUR_MAX > 1) {
			p->enc = enc;
			p->advance = locale_advance;
			p->endline = locale_endline;
			p->letter = locale_letter;
			p->width = locale_width;
		}
#endif

	toks[0] = "width";
	toks[1] = NULL;

	while (outopts && *outopts)
		switch (getsubopt(&outopts, UNCONST(toks), &v)) {
		case (0):
			p->defrmargin = (size_t)atoi(v);
			break;
		default:
			break;
		}

	/* Enforce a lower boundary. */
	if (p->defrmargin < 58)
		p->defrmargin = 58;

	return(p);
}

void *
ascii_alloc(char *outopts)
{

	return(ascii_init(TERMENC_ASCII, outopts));
}

void *
locale_alloc(char *outopts)
{

	return(ascii_init(TERMENC_LOCALE, outopts));
}

/* ARGSUSED */
static size_t
ascii_width(const struct termp *p, int c)
{

	return(1);
}

void
ascii_free(void *arg)
{

	term_free((struct termp *)arg);
}

/* ARGSUSED */
static void
ascii_letter(struct termp *p, int c)
{
	
	putchar(c);
}

static void
ascii_begin(struct termp *p)
{

	(*p->headf)(p, p->argf);
}

static void
ascii_end(struct termp *p)
{

	(*p->footf)(p, p->argf);
}

/* ARGSUSED */
static void
ascii_endline(struct termp *p)
{

	putchar('\n');
}

/* ARGSUSED */
static void
ascii_advance(struct termp *p, size_t len)
{
	size_t	 	i;

	for (i = 0; i < len; i++)
		putchar(' ');
}

/* ARGSUSED */
static double
ascii_hspan(const struct termp *p, const struct roffsu *su)
{
	double		 r;

	/*
	 * Approximate based on character width.  These are generated
	 * entirely by eyeballing the screen, but appear to be correct.
	 */

	switch (su->unit) {
	case (SCALE_CM):
		r = 4 * su->scale;
		break;
	case (SCALE_IN):
		r = 10 * su->scale;
		break;
	case (SCALE_PC):
		r = (10 * su->scale) / 6;
		break;
	case (SCALE_PT):
		r = (10 * su->scale) / 72;
		break;
	case (SCALE_MM):
		r = su->scale / 1000;
		break;
	case (SCALE_VS):
		r = su->scale * 2 - 1;
		break;
	default:
		r = su->scale;
		break;
	}

	return(r);
}

#ifdef USE_WCHAR
/* ARGSUSED */
static size_t
locale_width(const struct termp *p, int c)
{
	int		rc;

	return((rc = wcwidth(c)) < 0 ? 0 : rc);
}

/* ARGSUSED */
static void
locale_advance(struct termp *p, size_t len)
{
	size_t	 	i;

	for (i = 0; i < len; i++)
		putwchar(L' ');
}

/* ARGSUSED */
static void
locale_endline(struct termp *p)
{

	putwchar(L'\n');
}

/* ARGSUSED */
static void
locale_letter(struct termp *p, int c)
{
	
	putwchar(c);
}
#endif