mandoc/chars.c - diff

Return to chars.c CVS log

Up to [cvsweb.bsd.lv] / mandoc

Diff for /mandoc/chars.c between version 1.41 and 1.61

version 1.41, 2011/05/14 17:54:42

version 1.61, 2014/10/26 18:07:28

Line 1

/* $Id$ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

Line 15

* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#ifdef HAVE_CONFIG_H

#include "config.h"

#endif

#include <sys/types.h>

#include <assert.h>

#include <ctype.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "mandoc.h"

#include "mandoc_aux.h"

#include "libmandoc.h"

#define PRINT_HI 126

#define PRINT_LO 32

Line 35 struct ln {

Line 36 struct ln {

const char *code;

const char *ascii;

int unicode;

int type;

#define CHARS_CHAR (1 << 0)

#define CHARS_STRING (1 << 1)

#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)

};

#define LINES_MAX 353

#define LINES_MAX 330

#define CHAR(in, ch, code) \

{ NULL, (in), (ch), (code), CHARS_CHAR },

{ NULL, (in), (ch), (code) },

#define STRING(in, ch, code) \

{ NULL, (in), (ch), (code), CHARS_STRING },

#define BOTH(in, ch, code) \

{ NULL, (in), (ch), (code), CHARS_BOTH },

#define CHAR_TBL_START static struct ln lines[LINES_MAX] = {

#define CHAR_TBL_END };

Line 59 struct mchars {

Line 52 struct mchars {

struct ln **htab;

};

static inline int match(const struct ln *,

static const struct ln *find(const struct mchars *,

const char *, size_t, int);

const char *, size_t);

static const struct ln *find(struct mchars *, const char *, size_t, int);

void

mchars_free(struct mchars *arg)

{

Line 82 mchars_alloc(void)

Line 75 mchars_alloc(void)

* Constructs a very basic chaining hashtable. The hash routine

* is simply the integral value of the first character.

* Subsequent entries are chained in the order they're processed

* Subsequent entries are chained in the order they're processed.

* (they're in-line re-ordered during lookup).

tab = mandoc_malloc(sizeof(struct mchars));

htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));

htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));

for (i = 0; i < LINES_MAX; i++) {

hash = (int)lines[i].code[0] - PRINT_LO;

Line 106 mchars_alloc(void)

Line 98 mchars_alloc(void)

return(tab);

}

* Special character to Unicode codepoint.

int

mchars_spec2cp(struct mchars *arg, const char *p, size_t sz)

mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)

{

const struct ln *ln;

ln = find(arg, p, sz, CHARS_CHAR);

ln = find(arg, p, sz);

if (NULL == ln)

return(ln != NULL ? ln->unicode : sz == 1 ? *p : -1);

return(-1);

return(ln->unicode);

}

char

mchars_num2char(const char *p, size_t sz)

* Reserved word to Unicode codepoint.

int

mchars_res2cp(struct mchars *arg, const char *p, size_t sz)

{

const struct ln *ln;

int i;

ln = find(arg, p, sz, CHARS_STRING);

if ((i = mandoc_strntoi(p, sz, 10)) < 0)

if (NULL == ln)

return('\0');

return(-1);

return(ln->unicode);

return(i > 0 && i < 256 && isprint(i) ? i : '\0');

}

int

* Numbered character to literal character.

mchars_num2uc(const char *p, size_t sz)

* This can only be a printable character (i.e., alnum, punct, space) so

* prevent the character from ruining our state (backspace, newline, and

* so on).

char

mchars_num2char(const char *p, size_t sz)

{

int i;

if (sz > 3)

if ((i = mandoc_strntoi(p, sz, 16)) < 0)

return('\0');

return(0xFFFD);

i = atoi(p);

/* LINTED */

* XXX Code is missing here to exclude bogus ranges.

return(isprint(i) ? i : '\0');

return(i <= 0x10FFFF ? i : 0xFFFD);

}

* Special character to string array.

const char *

mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)

mchars_spec2str(const struct mchars *arg,

const char *p, size_t sz, size_t *rsz)

{

const struct ln *ln;

ln = find(arg, p, sz, CHARS_CHAR);

ln = find(arg, p, sz);

if (NULL == ln)

if (ln == NULL) {

return(NULL);

*rsz = 1;

return(sz == 1 ? p : NULL);

}

*rsz = strlen(ln->ascii);

return(ln->ascii);

}

* Reserved word to string array.

const char *

mchars_res2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)

mchars_uc2str(int uc)

{

const struct ln *ln;

int i;

ln = find(arg, p, sz, CHARS_STRING);

for (i = 0; i < LINES_MAX; i++)

if (NULL == ln)

if (uc == lines[i].unicode)

return(NULL);

return(lines[i].ascii);

return("<?>");

*rsz = strlen(ln->ascii);

return(ln->ascii);

}

static const struct ln *

find(struct mchars *tab, const char *p, size_t sz, int type)

find(const struct mchars *tab, const char *p, size_t sz)

{

struct ln *pp, *prev;

const struct ln *pp;

struct ln **htab;

int hash;

assert(p);

if (0 == sz)

return(NULL);

if (p[0] < PRINT_LO || p[0] > PRINT_HI)

if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)

return(NULL);

* Lookup the symbol in the symbol hash. See ascii2htab for the

* hashtable specs. This dynamically re-orders the hash chain

* to optimise for repeat hits.

hash = (int)p[0] - PRINT_LO;

htab = tab->htab;

if (NULL == (pp = htab[hash]))

for (pp = tab->htab[hash]; pp; pp = pp->next)

return(NULL);

if (0 == strncmp(pp->code, p, sz) &&

'\0' == pp->code[(int)sz])

return(pp);

for (prev = NULL; pp; pp = pp->next) {

if ( ! match(pp, p, sz, type)) {

prev = pp;

continue;

}

if (prev) {

prev->next = pp->next;

pp->next = htab[hash];

htab[hash] = pp;

}

return(pp);

}

return(NULL);

}

static inline int

match(const struct ln *ln, const char *p, size_t sz, int type)

{

if ( ! (ln->type & type))

return(0);

if (strncmp(ln->code, p, sz))

return(0);

return('\0' == ln->code[(int)sz]);

}

CVSweb