File: [cvsweb.bsd.lv] / mandoc / catman.c (download)
Revision 1.28, Mon Jun 30 11:24:30 2025 UTC (6 days, 5 hours ago) by schwarze
Branch: MAIN
Changes since 1.27: +64 -6 lines
Reuse the existing socket pair as a backchannel from mandocd(8) to catman(8).
This is needed because catman(8) can pass messages to mandocd(8) much
faster than mandocd(8) can process them: to file a new message, catman(8)
does not need to do much more than one call to fts_read(3), wheres
mandocd(8) needs to read, parse, and format the whole file in response.
So unprocessed messages pile up in the kernel, each of them containing
three file descriptors in flight, ultimately resulting in catman(8)
dying with:
catman: FATAL: sendmsg: Too many open files
The catman(8) program cannot find out on its own how far mandocd(8)
has progressed, so mandocd(8) has to tell it.
Each time mandocd(8) accepts a message from catman(8), it now sends
a one-byte message back. When catman(8) has the number of files in
flight that it considers acceptable, it does a blocking recv(3),
such that no more files get sent until at least one has been
accepted. As soon as it gets one message back, catman(8) then
does further non-blocking recv(3)s until they no longer succeed,
to reduce the count of files in flight as much as possible
before returning to sending more files.
When processing is complete or aborted, catman(8) waits for the
remaining files to be accepted before exiting.
Debugging output related to this feature is available with -vv.
|
/* $Id: catman.c,v 1.28 2025/06/30 11:24:30 schwarze Exp $ */
/*
* Copyright (c) 2017, 2025 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "config.h"
#if NEED_XPG4_2
#define _XPG4_2
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <assert.h>
#if HAVE_ERR
#include <err.h>
#endif
#include <errno.h>
#include <fcntl.h>
#if HAVE_FTS
#include <fts.h>
#else
#include "compat_fts.h"
#endif
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
int verbose_flag = 0;
sig_atomic_t got_signal = 0;
int process_manpage(int, int, const char *);
int process_tree(int, int);
void run_mandocd(int, const char *, const char *)
__attribute__((__noreturn__));
void signal_handler(int);
ssize_t sock_fd_write(int, int, int, int);
void usage(void) __attribute__((__noreturn__));
void
signal_handler(int signum)
{
got_signal = signum;
}
void
run_mandocd(int sockfd, const char *outtype, const char* defos)
{
char sockfdstr[10];
int len;
len = snprintf(sockfdstr, sizeof(sockfdstr), "%d", sockfd);
if (len >= (int)sizeof(sockfdstr)) {
errno = EOVERFLOW;
len = -1;
}
if (len < 0)
err(1, "snprintf");
if (defos == NULL)
execlp("mandocd", "mandocd", "-T", outtype,
sockfdstr, (char *)NULL);
else
execlp("mandocd", "mandocd", "-T", outtype,
"-I", defos, sockfdstr, (char *)NULL);
err(1, "exec(mandocd)");
}
ssize_t
sock_fd_write(int fd, int fd0, int fd1, int fd2)
{
const struct timespec timeout = { 0, 10000000 }; /* 0.01 s */
struct msghdr msg;
struct iovec iov;
union {
struct cmsghdr cmsghdr;
char control[CMSG_SPACE(3 * sizeof(int))];
} cmsgu;
struct cmsghdr *cmsg;
int *walk;
ssize_t sz;
unsigned char dummy[1] = {'\0'};
iov.iov_base = dummy;
iov.iov_len = sizeof(dummy);
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsgu.control;
msg.msg_controllen = sizeof(cmsgu.control);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_len = CMSG_LEN(3 * sizeof(int));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
walk = (int *)CMSG_DATA(cmsg);
*(walk++) = fd0;
*(walk++) = fd1;
*(walk++) = fd2;
/*
* It appears that on some systems, sendmsg(3)
* may return EAGAIN even in blocking mode.
* Seen for example on Oracle Solaris 11.2.
* The sleeping time was chosen by experimentation,
* to neither cause more than a handful of retries
* in normal operation nor unnecessary delays.
*/
while ((sz = sendmsg(fd, &msg, 0)) == -1) {
if (errno != EAGAIN) {
warn("FATAL: sendmsg");
break;
}
nanosleep(&timeout, NULL);
}
return sz;
}
int
process_manpage(int srv_fd, int dstdir_fd, const char *path)
{
int in_fd, out_fd;
int irc;
if ((in_fd = open(path, O_RDONLY)) == -1) {
warn("open %s for reading", path);
fflush(stderr);
return 0;
}
if ((out_fd = openat(dstdir_fd, path,
O_WRONLY | O_NOFOLLOW | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) == -1) {
warn("openat %s for writing", path);
fflush(stderr);
close(in_fd);
return 0;
}
irc = sock_fd_write(srv_fd, in_fd, out_fd, STDERR_FILENO);
close(in_fd);
close(out_fd);
return irc;
}
int
process_tree(int srv_fd, int dstdir_fd)
{
const struct timespec timeout = { 0, 10000000 }; /* 0.01 s */
const int max_inflight = 16;
FTS *ftsp;
FTSENT *entry;
const char *argv[2];
const char *path;
int inflight, irc, decr, fatal;
int gooddirs, baddirs, goodfiles, badfiles;
char dummy[1];
argv[0] = ".";
argv[1] = (char *)NULL;
if ((ftsp = fts_open((char * const *)argv,
FTS_PHYSICAL | FTS_NOCHDIR, NULL)) == NULL) {
warn("fts_open");
return -1;
}
if (verbose_flag >= 2) {
warnx("allowing up to %d files in flight", max_inflight);
fflush(stderr);
}
inflight = fatal = gooddirs = baddirs = goodfiles = badfiles = 0;
while (fatal == 0 && got_signal == 0 &&
(entry = fts_read(ftsp)) != NULL) {
if (inflight >= max_inflight) {
while (recv(srv_fd, dummy, sizeof(dummy), 0) == -1) {
if (errno != EAGAIN) {
warn("FATAL: recv");
fatal = errno;
break;
}
nanosleep(&timeout, NULL);
}
if (fatal != 0)
break;
decr = 1;
while ((irc = recv(srv_fd, dummy, sizeof(dummy),
MSG_DONTWAIT)) > 0)
decr++;
assert(inflight >= decr);
if (verbose_flag >= 2 && decr > 1) {
warnx("files in flight: %d - %d = %d",
inflight, decr, inflight - decr);
fflush(stderr);
}
inflight -= decr;
if (irc == 0)
errno = ECONNRESET;
if (errno != EAGAIN) {
warn("FATAL: recv");
fatal = errno;
break;
}
}
path = entry->fts_path + 2;
switch (entry->fts_info) {
case FTS_F:
switch (process_manpage(srv_fd, dstdir_fd, path)) {
case -1:
fatal = errno;
break;
case 0:
badfiles++;
break;
default:
goodfiles++;
inflight++;
break;
}
break;
case FTS_D:
if (*path != '\0' &&
mkdirat(dstdir_fd, path, S_IRWXU | S_IRGRP |
S_IXGRP | S_IROTH | S_IXOTH) == -1 &&
errno != EEXIST) {
warn("mkdirat %s", path);
fflush(stderr);
(void)fts_set(ftsp, entry, FTS_SKIP);
baddirs++;
} else
gooddirs++;
break;
case FTS_DP:
break;
case FTS_DNR:
warnx("directory %s unreadable: %s",
path, strerror(entry->fts_errno));
fflush(stderr);
baddirs++;
break;
case FTS_DC:
warnx("directory %s causes cycle", path);
fflush(stderr);
baddirs++;
break;
case FTS_ERR:
case FTS_NS:
warnx("file %s: %s",
path, strerror(entry->fts_errno));
fflush(stderr);
badfiles++;
break;
default:
warnx("file %s: not a regular file", path);
fflush(stderr);
badfiles++;
break;
}
}
if (got_signal != 0) {
switch (got_signal) {
case SIGCHLD:
warnx("FATAL: mandocd child died: got SIGCHLD");
break;
case SIGPIPE:
warnx("FATAL: mandocd child died: got SIGPIPE");
break;
default:
warnx("FATAL: signal SIG%s", sys_signame[got_signal]);
break;
}
inflight = -1;
fatal = 1;
} else if (fatal == 0 && (fatal = errno) != 0)
warn("FATAL: fts_read");
fts_close(ftsp);
if (verbose_flag >= 2 && inflight > 0) {
warnx("waiting for %d files in flight", inflight);
fflush(stderr);
}
while (inflight > 0) {
irc = recv(srv_fd, dummy, sizeof(dummy), 0);
if (irc > 0)
inflight--;
else if (irc == -1 && errno == EAGAIN)
nanosleep(&timeout, NULL);
else {
if (irc == 0)
errno = ECONNRESET;
warn("recv");
inflight = -1;
}
}
if (verbose_flag)
warnx("processed %d files in %d directories",
goodfiles, gooddirs);
if (baddirs > 0)
warnx("skipped %d %s due to errors", baddirs,
baddirs == 1 ? "directory" : "directories");
if (badfiles > 0)
warnx("skipped %d %s due to errors", badfiles,
badfiles == 1 ? "file" : "files");
if (fatal != 0) {
warnx("processing aborted due to fatal error, "
"results are probably incomplete");
inflight = -1;
}
return inflight;
}
int
main(int argc, char **argv)
{
struct sigaction sa;
const char *defos, *outtype;
int srv_fds[2];
int dstdir_fd;
int opt;
pid_t pid;
defos = NULL;
outtype = "ascii";
while ((opt = getopt(argc, argv, "I:T:v")) != -1) {
switch (opt) {
case 'I':
defos = optarg;
break;
case 'T':
outtype = optarg;
break;
case 'v':
verbose_flag += 1;
break;
default:
usage();
}
}
if (argc > 0) {
argc -= optind;
argv += optind;
}
if (argc != 2) {
switch (argc) {
case 0:
warnx("missing arguments: srcdir and dstdir");
break;
case 1:
warnx("missing argument: dstdir");
break;
default:
warnx("too many arguments: %s", argv[2]);
break;
}
usage();
}
memset(&sa, 0, sizeof(sa));
sa.sa_handler = &signal_handler;
sa.sa_flags = SA_NOCLDWAIT;
if (sigfillset(&sa.sa_mask) == -1)
err(1, "sigfillset");
if (sigaction(SIGHUP, &sa, NULL) == -1)
err(1, "sigaction(SIGHUP)");
if (sigaction(SIGINT, &sa, NULL) == -1)
err(1, "sigaction(SIGINT)");
if (sigaction(SIGPIPE, &sa, NULL) == -1)
err(1, "sigaction(SIGPIPE)");
if (sigaction(SIGTERM, &sa, NULL) == -1)
err(1, "sigaction(SIGTERM)");
if (sigaction(SIGCHLD, &sa, NULL) == -1)
err(1, "sigaction(SIGCHLD)");
if (socketpair(AF_LOCAL, SOCK_STREAM, AF_UNSPEC, srv_fds) == -1)
err(1, "socketpair");
pid = fork();
switch (pid) {
case -1:
err(1, "fork");
case 0:
close(srv_fds[0]);
run_mandocd(srv_fds[1], outtype, defos);
default:
break;
}
close(srv_fds[1]);
if ((dstdir_fd = open(argv[1], O_RDONLY | O_DIRECTORY)) == -1)
err(1, "open destination %s", argv[1]);
if (chdir(argv[0]) == -1)
err(1, "chdir to source %s", argv[0]);
return process_tree(srv_fds[0], dstdir_fd) == -1 ? 1 : 0;
}
void
usage(void)
{
fprintf(stderr, "usage: %s [-I os=name] [-T output] "
"srcdir dstdir\n", BINM_CATMAN);
exit(1);
}