version 1.94, 2014/01/02 20:24:39 |
version 1.97, 2014/01/05 00:29:54 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
}; |
}; |
|
|
struct str { |
struct str { |
char *utf8; /* key in UTF-8 form */ |
char *rendered; /* key in UTF-8 or ASCII form */ |
const struct mpage *mpage; /* if set, the owning parse */ |
const struct mpage *mpage; /* if set, the owning parse */ |
uint64_t mask; /* bitmask in sequence */ |
uint64_t mask; /* bitmask in sequence */ |
char key[]; /* the string itself */ |
char key[]; /* may contain escape sequences */ |
}; |
}; |
|
|
struct inodev { |
struct inodev { |
Line 165 static void putkeys(const struct mpage *, |
|
Line 165 static void putkeys(const struct mpage *, |
|
const char *, size_t, uint64_t); |
const char *, size_t, uint64_t); |
static void putmdockey(const struct mpage *, |
static void putmdockey(const struct mpage *, |
const struct mdoc_node *, uint64_t); |
const struct mdoc_node *, uint64_t); |
|
static void render_key(struct mchars *, struct str *); |
static void say(const char *, const char *, ...); |
static void say(const char *, const char *, ...); |
static int set_basedir(const char *); |
static int set_basedir(const char *); |
static int treescan(void); |
static int treescan(void); |
static size_t utf8(unsigned int, char [7]); |
static size_t utf8(unsigned int, char [7]); |
static void utf8key(struct mchars *, struct str *); |
|
|
|
static char *progname; |
static char *progname; |
static int use_all; /* use all found files */ |
static int use_all; /* use all found files */ |
static int nodb; /* no database changes */ |
static int nodb; /* no database changes */ |
static int verb; /* print what we're doing */ |
static int verb; /* print what we're doing */ |
static int warnings; /* warn about crap */ |
static int warnings; /* warn about crap */ |
|
static int write_utf8; /* write UTF-8 output; else ASCII */ |
static int exitcode; /* to be returned by main */ |
static int exitcode; /* to be returned by main */ |
static enum op op; /* operational mode */ |
static enum op op; /* operational mode */ |
static char basedir[PATH_MAX]; /* current base directory */ |
static char basedir[PATH_MAX]; /* current base directory */ |
Line 351 main(int argc, char *argv[]) |
|
Line 352 main(int argc, char *argv[]) |
|
path_arg = NULL; |
path_arg = NULL; |
op = OP_DEFAULT; |
op = OP_DEFAULT; |
|
|
while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW"))) |
while (-1 != (ch = getopt(argc, argv, "aC:d:nT:tu:vW"))) |
switch (ch) { |
switch (ch) { |
case ('a'): |
case ('a'): |
use_all = 1; |
use_all = 1; |
Line 369 main(int argc, char *argv[]) |
|
Line 370 main(int argc, char *argv[]) |
|
case ('n'): |
case ('n'): |
nodb = 1; |
nodb = 1; |
break; |
break; |
|
case ('T'): |
|
if (strcmp(optarg, "utf8")) { |
|
fprintf(stderr, "-T%s: Unsupported " |
|
"output format\n", optarg); |
|
goto usage; |
|
} |
|
write_utf8 = 1; |
|
break; |
case ('t'): |
case ('t'): |
CHECKOP(op, ch); |
CHECKOP(op, ch); |
dup2(STDOUT_FILENO, STDERR_FILENO); |
dup2(STDOUT_FILENO, STDERR_FILENO); |
|
|
ohash_delete(&mlinks); |
ohash_delete(&mlinks); |
return(exitcode); |
return(exitcode); |
usage: |
usage: |
fprintf(stderr, "usage: %s [-anvW] [-C file]\n" |
fprintf(stderr, "usage: %s [-anvW] [-C file] [-Tutf8]\n" |
" %s [-anvW] dir ...\n" |
" %s [-anvW] [-Tutf8] dir ...\n" |
" %s [-nvW] -d dir [file ...]\n" |
" %s [-nvW] [-Tutf8] -d dir [file ...]\n" |
" %s [-nvW] -u dir [file ...]\n" |
" %s [-nvW] -u dir [file ...]\n" |
" %s -t file ...\n", |
" %s -t file ...\n", |
progname, progname, progname, |
progname, progname, progname, |
Line 1015 mpages_merge(struct mchars *mc, struct mparse *mp, int |
|
Line 1024 mpages_merge(struct mchars *mc, struct mparse *mp, int |
|
mpage->title = |
mpage->title = |
mandoc_strdup(mpage->mlinks->name); |
mandoc_strdup(mpage->mlinks->name); |
} |
} |
|
putkey(mpage, mpage->sec, TYPE_sec); |
|
putkey(mpage, mpage->arch, TYPE_arch); |
|
|
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) |
for (mlink = mpage->mlinks; mlink; mlink = mlink->next) { |
|
if ('\0' != *mlink->dsec) |
|
putkey(mpage, mlink->dsec, TYPE_sec); |
|
if ('\0' != *mlink->fsec) |
|
putkey(mpage, mlink->fsec, TYPE_sec); |
|
putkey(mpage, mlink->arch, TYPE_arch); |
putkey(mpage, mlink->name, TYPE_Nm); |
putkey(mpage, mlink->name, TYPE_Nm); |
|
} |
|
|
if (warnings && !use_all) { |
if (warnings && !use_all) { |
match = 0; |
match = 0; |
Line 1653 utf8(unsigned int cp, char out[7]) |
|
Line 1670 utf8(unsigned int cp, char out[7]) |
|
} |
} |
|
|
/* |
/* |
* Store the UTF-8 version of a key, or alias the pointer if the key has |
* Store the rendered version of a key, or alias the pointer |
* no UTF-8 transcription marks in it. |
* if the key contains no escape sequences. |
*/ |
*/ |
static void |
static void |
utf8key(struct mchars *mc, struct str *key) |
render_key(struct mchars *mc, struct str *key) |
{ |
{ |
size_t sz, bsz, pos; |
size_t sz, bsz, pos; |
char utfbuf[7], res[5]; |
char utfbuf[7], res[5]; |
Line 1666 utf8key(struct mchars *mc, struct str *key) |
|
Line 1683 utf8key(struct mchars *mc, struct str *key) |
|
int len, u; |
int len, u; |
enum mandoc_esc esc; |
enum mandoc_esc esc; |
|
|
assert(NULL == key->utf8); |
assert(NULL == key->rendered); |
|
|
res[0] = '\\'; |
res[0] = '\\'; |
res[1] = '\t'; |
res[1] = '\t'; |
Line 1682 utf8key(struct mchars *mc, struct str *key) |
|
Line 1699 utf8key(struct mchars *mc, struct str *key) |
|
* pointer as ourselvse and get out of here. |
* pointer as ourselvse and get out of here. |
*/ |
*/ |
if (strcspn(val, res) == bsz) { |
if (strcspn(val, res) == bsz) { |
key->utf8 = key->key; |
key->rendered = key->key; |
return; |
return; |
} |
} |
|
|
Line 1722 utf8key(struct mchars *mc, struct str *key) |
|
Line 1739 utf8key(struct mchars *mc, struct str *key) |
|
* Parse the escape sequence and see if it's a |
* Parse the escape sequence and see if it's a |
* predefined character or special character. |
* predefined character or special character. |
*/ |
*/ |
|
|
esc = mandoc_escape |
esc = mandoc_escape |
((const char **)&val, &seq, &len); |
((const char **)&val, &seq, &len); |
if (ESCAPE_ERROR == esc) |
if (ESCAPE_ERROR == esc) |
break; |
break; |
|
|
if (ESCAPE_SPECIAL != esc) |
if (ESCAPE_SPECIAL != esc) |
continue; |
continue; |
if (0 == (u = mchars_spec2cp(mc, seq, len))) |
|
continue; |
|
|
|
/* |
/* |
* If we have a Unicode codepoint, try to convert that |
* Render the special character |
* to a UTF-8 byte string. |
* as either UTF-8 or ASCII. |
*/ |
*/ |
cpp = utfbuf; |
|
if (0 == (sz = utf8(u, utfbuf))) |
|
continue; |
|
|
|
|
if (write_utf8) { |
|
if (0 == (u = mchars_spec2cp(mc, seq, len))) |
|
continue; |
|
cpp = utfbuf; |
|
if (0 == (sz = utf8(u, utfbuf))) |
|
continue; |
|
sz = strlen(cpp); |
|
} else { |
|
cpp = mchars_spec2str(mc, seq, len, &sz); |
|
if (NULL == cpp) |
|
continue; |
|
if (ASCII_NBRSP == *cpp) { |
|
cpp = " "; |
|
sz = 1; |
|
} |
|
} |
|
|
/* Copy the rendered glyph into the stream. */ |
/* Copy the rendered glyph into the stream. */ |
|
|
sz = strlen(cpp); |
|
bsz += sz; |
bsz += sz; |
|
|
buf = mandoc_realloc(buf, bsz); |
buf = mandoc_realloc(buf, bsz); |
|
|
memcpy(&buf[pos], cpp, sz); |
memcpy(&buf[pos], cpp, sz); |
pos += sz; |
pos += sz; |
} |
} |
|
|
buf[pos] = '\0'; |
buf[pos] = '\0'; |
key->utf8 = buf; |
key->rendered = buf; |
} |
} |
|
|
/* |
/* |
* Flush the current page's terms (and their bits) into the database. |
* Flush the current page's terms (and their bits) into the database. |
* Wrap the entire set of additions in a transaction to make sqlite be a |
* Wrap the entire set of additions in a transaction to make sqlite be a |
* little faster. |
* little faster. |
* Also, UTF-8-encode the description at the last possible moment. |
* Also, handle escape sequences at the last possible moment. |
*/ |
*/ |
static void |
static void |
dbindex(const struct mpage *mpage, struct mchars *mc) |
dbindex(const struct mpage *mpage, struct mchars *mc) |
Line 1782 dbindex(const struct mpage *mpage, struct mchars *mc) |
|
Line 1808 dbindex(const struct mpage *mpage, struct mchars *mc) |
|
key = ohash_find(&strings, |
key = ohash_find(&strings, |
ohash_qlookup(&strings, mpage->desc)); |
ohash_qlookup(&strings, mpage->desc)); |
assert(NULL != key); |
assert(NULL != key); |
if (NULL == key->utf8) |
if (NULL == key->rendered) |
utf8key(mc, key); |
render_key(mc, key); |
desc = key->utf8; |
desc = key->rendered; |
} |
} |
|
|
SQL_EXEC("BEGIN TRANSACTION"); |
SQL_EXEC("BEGIN TRANSACTION"); |
Line 1818 dbindex(const struct mpage *mpage, struct mchars *mc) |
|
Line 1844 dbindex(const struct mpage *mpage, struct mchars *mc) |
|
for (key = ohash_first(&strings, &slot); NULL != key; |
for (key = ohash_first(&strings, &slot); NULL != key; |
key = ohash_next(&strings, &slot)) { |
key = ohash_next(&strings, &slot)) { |
assert(key->mpage == mpage); |
assert(key->mpage == mpage); |
if (NULL == key->utf8) |
if (NULL == key->rendered) |
utf8key(mc, key); |
render_key(mc, key); |
i = 1; |
i = 1; |
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); |
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); |
SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8); |
SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered); |
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); |
SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno); |
SQL_STEP(stmts[STMT_INSERT_KEY]); |
SQL_STEP(stmts[STMT_INSERT_KEY]); |
sqlite3_reset(stmts[STMT_INSERT_KEY]); |
sqlite3_reset(stmts[STMT_INSERT_KEY]); |
if (key->utf8 != key->key) |
if (key->rendered != key->key) |
free(key->utf8); |
free(key->rendered); |
free(key); |
free(key); |
} |
} |
|
|