=================================================================== RCS file: /cvs/mandoc/mandocdb.c,v retrieving revision 1.266 retrieving revision 1.274 diff -u -p -r1.266 -r1.274 --- mandoc/mandocdb.c 2020/01/26 21:25:41 1.266 +++ mandoc/mandocdb.c 2024/05/14 21:19:12 1.274 @@ -1,7 +1,7 @@ -/* $Id: mandocdb.c,v 1.266 2020/01/26 21:25:41 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.274 2024/05/14 21:19:12 schwarze Exp $ */ /* + * Copyright (c) 2011-2021, 2024 Ingo Schwarze * Copyright (c) 2011, 2012 Kristaps Dzonsons - * Copyright (c) 2011-2020 Ingo Schwarze * Copyright (c) 2016 Ed Maste * * Permission to use, copy, modify, and distribute this software for any @@ -15,6 +15,8 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Implementation of the makewhatis(8) program. */ #include "config.h" @@ -118,7 +120,7 @@ struct mdoc_handler { int mandocdb(int, char *[]); static void dbadd(struct dba *, struct mpage *); -static void dbadd_mlink(const struct mlink *mlink); +static void dbadd_mlink(const struct mlink *); static void dbprune(struct dba *); static void dbwrite(struct dba *); static void filescan(const char *); @@ -163,12 +165,15 @@ static void putkey(const struct mpage *, char *, uint static void putkeys(const struct mpage *, char *, size_t, uint64_t); static void putmdockey(const struct mpage *, const struct roff_node *, uint64_t, int); +#ifdef READ_ALLOWED_PATH +static int read_allowed(const char *); +#endif static int render_string(char **, size_t *); static void say(const char *, const char *, ...) __attribute__((__format__ (__printf__, 2, 3))); static int set_basedir(const char *, int); static int treescan(void); -static size_t utf8(unsigned int, char [7]); +static size_t utf8(unsigned int, char[5]); static int nodb; /* no database changes */ static int mparse_options; /* abort the parse early */ @@ -348,7 +353,7 @@ mandocdb(int argc, char *argv[]) goto usage; \ } while (/*CONSTCOND*/0) - mparse_options = MPARSE_VALIDATE; + mparse_options = MPARSE_UTF8 | MPARSE_LATIN1 | MPARSE_VALIDATE; path_arg = NULL; op = OP_DEFAULT; @@ -527,6 +532,9 @@ out: mpages_free(); ohash_delete(&mpages); ohash_delete(&mlinks); +#if DEBUG_MEMORY + mandoc_dbg_finish(); +#endif return exitcode; usage: progname = getprogname(); @@ -610,8 +618,8 @@ treescan(void) continue; } if (strncmp(buf, basedir, basedir_len) != 0 -#ifdef HOMEBREWDIR - && strncmp(buf, HOMEBREWDIR, strlen(HOMEBREWDIR)) +#ifdef READ_ALLOWED_PATH + && !read_allowed(buf) #endif ) { if (warnings) say("", @@ -624,6 +632,8 @@ treescan(void) say(path, "&stat"); continue; } + if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG) + continue; /* FALLTHROUGH */ /* @@ -794,7 +804,7 @@ filescan(const char *infile) * We have to do lstat(2) before realpath(3) loses * the information whether this is a symbolic link. * We need to know that because for symbolic links, - * we want to use the orginal file name, while for + * we want to use the original file name, while for * regular files, we want to use the real path. */ if (lstat(infile, &st) == -1) { @@ -821,8 +831,8 @@ filescan(const char *infile) start = usefile; else if (strncmp(usefile, basedir, basedir_len) == 0) start = usefile + basedir_len; -#ifdef HOMEBREWDIR - else if (strncmp(usefile, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0) +#ifdef READ_ALLOWED_PATH + else if (read_allowed(usefile)) start = usefile; #endif else { @@ -1897,49 +1907,35 @@ putkeys(const struct mpage *mpage, char *cp, size_t sz * Take a Unicode codepoint and produce its UTF-8 encoding. * This isn't the best way to do this, but it works. * The magic numbers are from the UTF-8 packaging. - * They're not as scary as they seem: read the UTF-8 spec for details. + * Read the UTF-8 spec or the utf8(7) manual page for details. */ static size_t -utf8(unsigned int cp, char out[7]) +utf8(unsigned int cp, char out[5]) { size_t rc; - rc = 0; - if (cp <= 0x0000007F) { + if (cp <= 0x7f) { rc = 1; out[0] = (char)cp; - } else if (cp <= 0x000007FF) { + } else if (cp <= 0x7ff) { rc = 2; out[0] = (cp >> 6 & 31) | 192; out[1] = (cp & 63) | 128; - } else if (cp <= 0x0000FFFF) { + } else if (cp >= 0xd800 && cp <= 0xdfff) { + rc = 0; /* reject UTF-16 surrogate */ + } else if (cp <= 0xffff) { rc = 3; out[0] = (cp >> 12 & 15) | 224; out[1] = (cp >> 6 & 63) | 128; out[2] = (cp & 63) | 128; - } else if (cp <= 0x001FFFFF) { + } else if (cp <= 0x10ffff) { rc = 4; out[0] = (cp >> 18 & 7) | 240; out[1] = (cp >> 12 & 63) | 128; out[2] = (cp >> 6 & 63) | 128; out[3] = (cp & 63) | 128; - } else if (cp <= 0x03FFFFFF) { - rc = 5; - out[0] = (cp >> 24 & 3) | 248; - out[1] = (cp >> 18 & 63) | 128; - out[2] = (cp >> 12 & 63) | 128; - out[3] = (cp >> 6 & 63) | 128; - out[4] = (cp & 63) | 128; - } else if (cp <= 0x7FFFFFFF) { - rc = 6; - out[0] = (cp >> 30 & 1) | 252; - out[1] = (cp >> 24 & 63) | 128; - out[2] = (cp >> 18 & 63) | 128; - out[3] = (cp >> 12 & 63) | 128; - out[4] = (cp >> 6 & 63) | 128; - out[5] = (cp & 63) | 128; } else - return 0; + rc = 0; out[rc] = '\0'; return rc; @@ -2021,7 +2017,21 @@ render_string(char **public, size_t *psz) */ scp++; - if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) + switch (mandoc_escape(&scp, &seq, &seqlen)) { + case ESCAPE_UNICODE: + unicode = mchars_num2uc(seq + 1, seqlen - 1); + break; + case ESCAPE_NUMBERED: + unicode = mchars_num2char(seq, seqlen); + break; + case ESCAPE_SPECIAL: + unicode = mchars_spec2cp(seq, seqlen); + break; + default: + unicode = -1; + break; + } + if (unicode <= 0) continue; /* @@ -2030,21 +2040,17 @@ render_string(char **public, size_t *psz) */ if (write_utf8) { - unicode = mchars_spec2cp(seq, seqlen); - if (unicode <= 0) - continue; addsz = utf8(unicode, utfbuf); if (addsz == 0) continue; addcp = utfbuf; } else { - addcp = mchars_spec2str(seq, seqlen, &addsz); + addcp = mchars_uc2str(unicode); if (addcp == NULL) continue; - if (*addcp == ASCII_NBRSP) { + if (*addcp == ASCII_NBRSP) addcp = " "; - addsz = 1; - } + addsz = strlen(addcp); } /* Copy the rendered glyph into the stream. */ @@ -2244,11 +2250,11 @@ dbwrite(struct dba *dba) say(tfn, "&dba_write"); goto err; } - if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) { + if ((fd1 = open(MANDOC_DB, O_RDONLY)) == -1) { say(MANDOC_DB, "&open"); goto err; } - if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) { + if ((fd2 = open(tfn, O_RDONLY)) == -1) { say(tfn, "&open"); goto err; } @@ -2378,6 +2384,25 @@ set_basedir(const char *targetdir, int report_baddir) } return 1; } + +#ifdef READ_ALLOWED_PATH +static int +read_allowed(const char *candidate) +{ + const char *cp; + size_t len; + + for (cp = READ_ALLOWED_PATH;; cp += len) { + while (*cp == ':') + cp++; + if (*cp == '\0') + return 0; + len = strcspn(cp, ":"); + if (strncmp(candidate, cp, len) == 0) + return 1; + } +} +#endif static void say(const char *file, const char *format, ...)