diff options
author | Reepca Russelstein <reepca@russelstein.xyz> | 2025-04-18 01:35:31 -0500 |
---|---|---|
committer | John Kehayias <john.kehayias@protonmail.com> | 2025-06-24 10:07:57 -0400 |
commit | fb42611b8f27960304db5a1c0d33b8371dcde2a8 (patch) | |
tree | e4331b4b340c3304684914044d543ecd0e653fb7 | |
parent | be8aca065118aa4485c02f991c51bea89034defa (diff) |
daemon: Use slirp4netns to provide networking to fixed-output derivations.
Previously, the builder of a fixed-output derivation could communicate with an
external process via an abstract Unix-domain socket. In particular, it could
send an open file descriptor to the store, granting write access to some of
its output files in the store provided the derivation build fails—the fix for
CVE-2024-27297 did not address this specific case. It could also send an open
file descriptor to a setuid program, which could then be executed using
execveat to gain the privileges of the build user.
With this change, fixed-output derivations other than “builtin:download”
and “builtin:git-download” always run in a separate network namespace
and have network access provided by a TAP device backed by slirp4netns,
thereby closing the abstract Unix-domain socket channel.
* nix/libstore/globals.hh (Settings)[useHostLoopback, slirp4netns]: new
fields.
* config-daemon.ac (SLIRP4NETNS): new C preprocessor definition.
* nix/libstore/globals.cc (Settings::Settings): initialize them to defaults.
* nix/nix-daemon/guix-daemon.cc (options): add --isolate-host-loopback option.
* doc/guix.texi: document it.
* nix/libstore/build.cc (DerivationGoal)[slirp]: New field.
(setupTap, setupTapAction, waitForSlirpReadyAction, enableRouteLocalnetAction,
prepareSlirpChrootAction, spawnSlirp4netns, haveGlobalIPv6Address,
remapIdsTo0Action): New functions.
(initializeUserNamespace): allow the guest UID and GID to be specified.
(DerivationGoal::killChild): When ‘slirp’ is not -1, call ‘kill’.
(DerivationGoal::startBuilder): Unconditionally add CLONE_NEWNET to FLAGS.
When ‘fixedOutput’ is true, spawn ‘slirp4netns’.
When ‘fixedOutput’ and ‘useChroot’ are true, add setupTapAction,
waitForSlirpReadyAction, and enableRouteLocalnetAction to builder setup
phases.
Create a /etc/resolv.conf for fixed-output derivations that directs them to
slirp4netns's dns address.
When settings.useHostLoopback is true, supply fixed-output derivations with a
/etc/hosts that resolves "localhost" to slirp4netns's address for accessing
the host loopback.
* nix/libutil/util.cc (keepOnExec, decodeOctalEscaped, sendFD, receiveFD,
findProgram): New functions.
* nix/libutil/util.hh (keepOnExec, decodeOctalEscaped, sendFD, receiveFD,
findProgram): New declarations.
* gnu/packages/package-management.scm (guix): add slirp4netns input for linux
targets.
* tests/derivations.scm (builder-network-isolated?): new variable.
("fixed-output derivation, network access, localhost", "fixed-output
derivation, network access, external host"):
skip test case if fixed output derivations are isolated from the network.
Change-Id: Ia3fea2ab7add56df66800071cf15cdafe7bfab96
Signed-off-by: John Kehayias <john.kehayias@protonmail.com>
-rw-r--r-- | config-daemon.ac | 13 | ||||
-rw-r--r-- | doc/guix.texi | 6 | ||||
-rw-r--r-- | gnu/packages/package-management.scm | 4 | ||||
-rw-r--r-- | nix/libstore/build.cc | 551 | ||||
-rw-r--r-- | nix/libstore/globals.cc | 2 | ||||
-rw-r--r-- | nix/libstore/globals.hh | 9 | ||||
-rw-r--r-- | nix/libutil/util.cc | 101 | ||||
-rw-r--r-- | nix/libutil/util.hh | 16 | ||||
-rw-r--r-- | nix/nix-daemon/guix-daemon.cc | 6 | ||||
-rw-r--r-- | tests/derivations.scm | 17 |
10 files changed, 704 insertions, 21 deletions
diff --git a/config-daemon.ac b/config-daemon.ac index 35d9c8cd56..fe73b893ec 100644 --- a/config-daemon.ac +++ b/config-daemon.ac @@ -139,6 +139,19 @@ if test "x$guix_build_daemon" = "xyes"; then AC_SUBST([GUIX_TEST_ROOT]) GUIX_CHECK_LOCALSTATEDIR + + case "x$host_os" in + x*linux*) + AC_CHECK_PROG([have_slirp4netns], [slirp4netns], [yes]) + if test "x$have_slirp4netns" != "xyes" + then + AC_MSG_WARN([Slirp4netns not found; fixed-output chroot builds won't work without it.]) + fi + ;; + esac + AC_PATH_PROG([SLIRP4NETNS], [slirp4netns], [slirp4netns]) + AC_DEFINE_UNQUOTED([SLIRP4NETNS], ["$SLIRP4NETNS"], + [Path to the slirp4netns program, if any.]) fi AM_CONDITIONAL([HAVE_LIBBZ2], [test "x$HAVE_LIBBZ2" = "xyes"]) diff --git a/doc/guix.texi b/doc/guix.texi index 1b7d3392ab..9d9d2aa083 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -1702,6 +1702,12 @@ user namespaces is missing (@pxref{Build Environment Setup}). Use at your own risk! @end quotation +@item --isolate-host-loopback +Prevent fixed-output chroot builds from accessing the host loopback. +When this is used, fixed-output builds will have their own loopback +interface, and attempts to connect to 127.0.0.1 or ::1 will instead +attempt to connect to its local loopback interface. + @item --log-compression=@var{type} Compress build logs according to @var{type}, one of @code{gzip}, @code{bzip2}, or @code{none}. diff --git a/gnu/packages/package-management.scm b/gnu/packages/package-management.scm index 1421464681..f2b7f40702 100644 --- a/gnu/packages/package-management.scm +++ b/gnu/packages/package-management.scm @@ -59,6 +59,7 @@ #:use-module (gnu packages build-tools) #:use-module (gnu packages check) #:use-module (gnu packages compression) + #:use-module (gnu packages containers) #:use-module (gnu packages cmake) #:use-module (gnu packages cpio) #:use-module (gnu packages cpp) @@ -513,6 +514,9 @@ $(prefix)/etc/openrc\n"))) ;; Some of the tests use "unshare" when it is available. ("util-linux" ,util-linux) + ,@(if (target-linux?) + `(("slirp4netns" ,slirp4netns)) + '()) ;; Many tests rely on the 'guile-bootstrap' package, which is why we ;; have it here. diff --git a/nix/libstore/build.cc b/nix/libstore/build.cc index 51f5aed106..1a688f3b56 100644 --- a/nix/libstore/build.cc +++ b/nix/libstore/build.cc @@ -14,6 +14,7 @@ #include <map> #include <sstream> #include <algorithm> +#include <regex> #include <limits.h> #include <time.h> @@ -73,10 +74,18 @@ #endif #if CHROOT_ENABLED -#include <sys/socket.h> #include <sys/ioctl.h> #include <net/if.h> -#include <netinet/ip.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <net/route.h> +#include <arpa/inet.h> +#if __linux__ +#include <linux/if_tun.h> +/* This header isn't documented in 'man netdevice', but there doesn't seem to + be any other way to get 'struct in6_ifreq'... */ +#include <linux/ipv6.h> +#endif #endif #if __linux__ @@ -661,6 +670,10 @@ private: /* Whether this is a fixed-output derivation. */ bool fixedOutput; + /* PID of the 'slirp4netns' process in case of a fixed-output + derivation. */ + Pid slirp; + typedef void (DerivationGoal::*GoalState)(); GoalState state; @@ -831,6 +844,10 @@ void DerivationGoal::killChild() worker.childTerminated(hook->pid); } hook.reset(); + + if (slirp != -1) + /* Terminate the 'slirp4netns' process. */ + slirp.kill(); } @@ -1611,7 +1628,9 @@ static const gid_t guestGID = 30000; /* Initialize the user namespace of CHILD. */ static void initializeUserNamespace(pid_t child, uid_t hostUID = getuid(), - gid_t hostGID = getgid()) + gid_t hostGID = getgid(), + uid_t guestUID = guestUID, + gid_t guestGID = guestGID) { writeFile("/proc/" + std::to_string(child) + "/uid_map", (format("%d %d 1") % guestUID % hostUID).str()); @@ -1624,12 +1643,427 @@ static void initializeUserNamespace(pid_t child, #if CHROOT_ENABLED -void clearRootWritePermsAction(SpawnContext & sctx) +/* Creating TAP device for the fixed-output derivation build environment, + based on how slirp4netns does it. send_fd_socket is a unix-domain socket + that a file descriptor for the TAP device will be sent on along with a + single null byte of regular data. */ +static void setupTap(int send_fd_socket, bool ipv6Enabled) +{ + AutoCloseFD tapfd; + struct ifreq ifr; + struct in6_ifreq ifr6; + char tapname[] = "tap0"; + int ifindex; + + tapfd = open("/dev/net/tun", O_RDWR); + if(tapfd < 0) + throw SysError("opening `/dev/net/tun'"); + + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strncpy(ifr.ifr_name, tapname, sizeof(ifr.ifr_name) - 1); + if(ioctl(tapfd, TUNSETIFF, (void*)&ifr) < 0) + throw SysError("TUNSETIFF"); + + /* DAD is "duplicate address detection". By default the kernel will put + any ipv6 addresses that we add into the "tentative" state, and only + after several seconds have been spent trying to chat with network + neighbors about whether anyone is already using the address will it + allow it to be bound to, whether for listening or for connecting. + + This causes tcp connections initiated before then to bind to ::1, which + obviously is not a valid address for communication between hosts. Even + after the real addresses leave the "tentative" state, the source address + used for the already-started connection attempt does not change. + + In our situation we know for a fact nobody else is using the addresses + we give, so there's no point in waiting the extra several seconds to + perform DAD; disable it entirely instead. + + Note: this needs to use conf/tap0/ instead of conf/all/ */ + writeFile("/proc/sys/net/ipv6/conf/tap0/accept_dad", "0"); + + /* By default tap0 will solicit and receive router advertisements, and + * thereby obtain an ipv6 address from slirp4netns. But if the host + * doesn't have a working ipv6 connection, this could mess things up for + * guest programs (and really the guest network stack itself), as they + * have no way of knowing that, and will therefore likely try connecting + * to addresses found in AAAA records, which will fail. To prevent this, + * ignore router advertisements. */ + writeFile("/proc/sys/net/ipv6/conf/tap0/accept_ra", "0"); + + /* Now set up: + 1. tap0's active flags (so it's running, up, etc) + 2. tap0's MTU + 3. tap0's ip address + 4. tap0's network mask + 5. A default route to tap0 */ + AutoCloseFD sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + if(sockfd < 0) + throw SysError("creating socket"); + + AutoCloseFD sockfd6 = socket(AF_INET6, SOCK_DGRAM, 0); + + if(sockfd6 < 0) + throw SysError("creating ipv6 socket"); + + if(ioctl(sockfd, SIOCGIFINDEX, &ifr) < 0) + throw SysError("getting tap0 ifindex"); + + ifindex = ifr.ifr_ifindex; + + ifr.ifr_flags = IFF_UP | IFF_RUNNING; + if(ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) + throw SysError("setting flags for tap0"); + + /* slirp4netns default */ + ifr.ifr_mtu = 1500; + if(ioctl(sockfd, SIOCSIFMTU, &ifr) < 0) + throw SysError("setting MTU for tap0"); + + /* default network CIDR: 10.0.2.0/24, fd00::/64 */ + /* default recommended_vguest: 10.0.2.100, fd00::??? (we choose to use + fd00::80 and fe80::80) */ + /* default gateway: 10.0.2.2, fd00::2 */ + struct sockaddr_in *sai = (struct sockaddr_in *) &ifr.ifr_addr; + sai->sin_family = AF_INET; + sai->sin_port = htonl(0); + if(inet_pton(AF_INET, "10.0.2.100", &sai->sin_addr) != 1) + throw Error("inet_pton failed"); + + if(ioctl(sockfd, SIOCSIFADDR, &ifr) < 0) + throw SysError("setting tap0 address"); + + if(ipv6Enabled) { + if(inet_pton(AF_INET6, "fd00::80", &ifr6.ifr6_addr) != 1) + throw Error("inet_pton failed"); + ifr6.ifr6_prefixlen = 64; + ifr6.ifr6_ifindex = ifindex; + + if(ioctl(sockfd6, SIOCSIFADDR, &ifr6) < 0) + throw SysError("setting tap0 ipv6 address"); + } + + /* Always set up the link-local address so that communication with the + * host loopback over ipv6 can be possible. */ + if(inet_pton(AF_INET6, "fe80::80", &ifr6.ifr6_addr) != 1) + throw Error("inet_pton failed"); + ifr6.ifr6_prefixlen = 64; + ifr6.ifr6_ifindex = ifindex; + + if(ioctl(sockfd6, SIOCSIFADDR, &ifr6) < 0) + throw SysError("setting tap0 link-local ipv6 address"); + + if(inet_pton(AF_INET, "255.255.255.0", &sai->sin_addr) != 1) + throw Error("inet_pton failed"); + + if(ioctl(sockfd, SIOCSIFNETMASK, &ifr) < 0) + throw SysError("setting tap0 network mask"); + + /* To my knowledge there is no official documentation of SIOCADDRT and + struct rtentry for Linux aside from the Linux kernel source code as of + the year 2025. This is therefore fully cargo-culted from + slirp4netns. */ + + struct rtentry route; + memset(&route, 0, sizeof(route)); + sai = (struct sockaddr_in *)&route.rt_gateway; + sai->sin_family = AF_INET; + if(inet_pton(AF_INET, "10.0.2.2", &sai->sin_addr) != 1) + throw Error("inet_pton failed"); + sai = (struct sockaddr_in *)&route.rt_dst; + sai->sin_family = AF_INET; + sai->sin_addr.s_addr = htonl(INADDR_ANY); + sai = (struct sockaddr_in *)&route.rt_genmask; + sai->sin_family = AF_INET; + sai->sin_addr.s_addr = htonl(INADDR_ANY); + + route.rt_flags = RTF_UP | RTF_GATEWAY; + route.rt_metric = 0; + route.rt_dev = tapname; + + if(ioctl(sockfd, SIOCADDRT, &route) < 0) + throw SysError("setting tap0 as default route"); + + struct in6_rtmsg route6; + memset(&route6, 0, sizeof(route6)); + if(inet_pton(AF_INET6, "fd00::2", &route6.rtmsg_gateway) != 1) + throw Error("inet_pton failed"); + + if(ipv6Enabled) { + /* Set up a default gateway via slirp4netns */ + route6.rtmsg_dst = IN6ADDR_ANY_INIT; + route6.rtmsg_dst_len = 0; + route6.rtmsg_flags = RTF_UP | RTF_GATEWAY; + } else { + /* Set up a route to slirp4netns, but only for talking to the host + * loopback */ + if(inet_pton(AF_INET6, "fd00::2", &route6.rtmsg_dst) != 1) + throw Error("inet_pton failed"); + route6.rtmsg_dst_len = 128; + route6.rtmsg_flags = RTF_UP; + } + route6.rtmsg_src = IN6ADDR_ANY_INIT; + route6.rtmsg_src_len = 0; + route6.rtmsg_ifindex = ifindex; + route6.rtmsg_metric = 1; + + if(ioctl(sockfd6, SIOCADDRT, &route6) < 0) + throw SysError("setting tap0 as default ipv6 route"); + + sendFD(send_fd_socket, tapfd); +} + +struct ChrootBuildSpawnContext : CloneSpawnContext { + bool ipv6Enabled = false; +}; + +static void setupTapAction(SpawnContext & sctx) +{ + ChrootBuildSpawnContext & ctx = (ChrootBuildSpawnContext &) sctx; + setupTap(ctx.setupFD, ctx.ipv6Enabled); +} + + +static void waitForSlirpReadyAction(SpawnContext & sctx) +{ + CloneSpawnContext & ctx = (CloneSpawnContext &) sctx; + /* Wait for the parent process to get slirp4netns running */ + waitForMessage(ctx.setupFD, "1"); +} + + +static void enableRouteLocalnetAction(SpawnContext & sctx) +{ + /* Don't treat as invalid packets received with loopback source addresses. + This allows for packets to be received from the host loopback using its + real address, so for example proxy settings referencing 127.0.0.1 will + work both for builtin and regular fixed-output derivations. */ + + /* Note: this file is treated relative to the network namespace of the + process that opens it. We aren't modifying any host settings here, + provided we are in a new network namespace. */ + Path route_localnet4 = "/proc/sys/net/ipv4/conf/all/route_localnet"; + /* XXX: no such toggle exists for ipv6 */ + if(pathExists(route_localnet4)) + writeFile(route_localnet4, "1"); +} + + +static void prepareSlirpChrootAction(SpawnContext & sctx) +{ + CloneSpawnContext & ctx = (CloneSpawnContext &) sctx; + auto mounts = tokenizeString<Strings>(readFile("/proc/self/mountinfo", true), "\n"); + set<string> seen; + for(auto & i : mounts) { + auto fields = tokenizeString<vector<string> >(i, " "); + auto fs = decodeOctalEscaped(fields.at(4)); + if(seen.find(fs) == seen.end()) { + /* slirp4netns only does a single umount of the old root ("/old") + after pivot_root. Because of this, if there are multiple + mounts stacked on top of each other, only the topmost one (the + read-only bind mount) will be unmounted, leaving the real root + in place and causing the subsequent rmdir to fail. The best we + can do is to make everything immediately underneath "/" be + read-only, which we do after mounting every non-/ filesystem + read-only. */ + if(fs == "/") continue; + /* Don't mount /etc or any of its subdirectories, we're only interested + in mounting network stuff from it */ + if(fs.compare(0, 4, "/etc") == 0) continue; + /* We want /run to be empty */ + if(fs.compare(0, 4, "/run") == 0) continue; + /* Don't mount anything from under our chroot directory */ + if(fs.compare(0, ctx.chrootRootDir.length(), ctx.chrootRootDir) == 0) continue; + struct stat st; + if(stat(fs.c_str(), &st) != 0) { + if(errno == EACCES) continue; /* Not accessible anyway */ + else throw SysError(format("stat of `%1%'") % fs); + } + + ctx.readOnlyFilesInChroot.insert(fs); + ctx.filesInChroot[fs] = fs; + seen.insert(fs); + } + } + + /* Limit /etc to containing just /etc/resolv.conf and /etc/hosts, and + read-only at that */ + Strings etcFiles = { "/etc/resolv.conf", "/etc/hosts" }; + for(auto & i : etcFiles) { + if(pathExists(i)) { + ctx.filesInChroot[i] = i; + ctx.readOnlyFilesInChroot.insert(i); + } + } + + /* Make everything immediately under "/" read-only, since we can't make / + itself read-only. */ + DirEntries dirs = readDirectory("/"); + for (auto & i : dirs) { + string fs = "/" + i.name; + if(fs == "/etc") continue; + if(fs == "/run") continue; + ctx.filesInChroot[fs] = fs; + ctx.readOnlyFilesInChroot.insert(fs); + } + + if(mkdir((ctx.chrootRootDir + "/run").c_str(), 0700) == -1) + throw SysError("mkdir /run in chroot"); +} + + +static void remapIdsTo0Action(SpawnContext & sctx) +{ + CloneSpawnContext & ctx = (CloneSpawnContext &) sctx; + string uid = std::to_string(ctx.setuid ? ctx.user : getuid()); + string gid = std::to_string(ctx.setgid ? ctx.group : getgid()); + + /* If uid != getuid(), then the process that writes to uid_map needs + * capabilities in the parent user namespace. Fork a child to stay in + * the parent namespace and do the write for us. */ + unshareAndInitUserns(CLONE_NEWUSER, + "0 " + uid + " 1", + "0 " + gid + " 1", + ctx.lockMountsAllowSetgroups); + + ctx.user = 0; + ctx.group = 0; +} + + +/* Spawn 'slirp4netns' in separate namespaces as the given user and group; + 'tapfd' must correspond to a /dev/net/tun connection. Configure it to + write to 'notifyReadyFD' once it's up and running. */ +static pid_t spawnSlirp4netns(int tapfd, int notifyReadyFD, + uid_t slirpUser, gid_t slirpGroup) +{ + Pipe slirpSetupPipe; + CloneSpawnContext slirpCtx; + AutoCloseFD devNullFd; + bool amRoot = geteuid() == 0; + bool newUserNS = !amRoot; + slirpCtx.phases = getCloneSpawnPhases(); + slirpCtx.cloneFlags = + /* slirp4netns will handle the chroot and pivot_root on its own, but + we should ensure that whatever filesystem holds the slirp4netns + executable is read-only, since otherwise it might be possible for a + compromised slirp4netns to overwrite itself using /proc/self/exe, + depending on who owns what. */ + CLONE_NEWNS | + /* ptrace disregards user namespaces when the would-be tracing process + and the would-be traced process have the same real, effective, and + saved user ids. The only way to protect them is to make it + impossible to reference them. */ + CLONE_NEWPID | + /* need this when we're not running as root so that we have the + * capabilities to create the other namespaces. */ + (newUserNS ? CLONE_NEWUSER : 0) | + /* For good measure */ + CLONE_NEWIPC | + CLONE_NEWUTS | + /* Of course, a new network namespace would defeat the + purpose. */ + SIGCHLD; + slirpCtx.program = settings.slirp4netns; + slirpCtx.args = + { "slirp4netns", "--netns-type=tapfd", + "--enable-sandbox", + "--enable-ipv6", + "--ready-fd=" + std::to_string(notifyReadyFD) }; + if(!settings.useHostLoopback) + slirpCtx.args.push_back("--disable-host-loopback"); + slirpCtx.args.push_back(std::to_string(tapfd)); + slirpCtx.inheritEnv = true; + if(newUserNS) { + slirpSetupPipe.create(); + slirpCtx.setupFD = slirpSetupPipe.readSide; + slirpCtx.earlyCloseFDs.insert(slirpSetupPipe.writeSide); + } + slirpCtx.closeMostFDs = true; + slirpCtx.preserveFDs.insert(notifyReadyFD); + slirpCtx.preserveFDs.insert(tapfd); + slirpCtx.setStdin = true; + slirpCtx.stdinFile = "/dev/null"; + slirpCtx.setsid = true; + slirpCtx.dropAmbientCapabilities = true; + slirpCtx.doChroot = true; + slirpCtx.mountTmpfsOnChroot = true; + slirpCtx.chrootRootDir = getEnv("TMPDIR", "/tmp"); + slirpCtx.lockMounts = true; + slirpCtx.lockMountsMapAll = true; /* So that later setuid will work */ + slirpCtx.lockMountsAllowSetgroups = amRoot; + slirpCtx.mountProc = true; + slirpCtx.setuid = true; + slirpCtx.user = slirpUser; + slirpCtx.setgid = true; + slirpCtx.group = slirpGroup; + /* Dropping supplementary groups requires capabilities in current user + * namespace */ + if(amRoot) { + slirpCtx.supplementaryGroups = {}; + slirpCtx.setSupplementaryGroups = true; + } + slirpCtx.seccompFilter = slirpSeccompFilter(); + slirpCtx.addSeccompFilter = true; + + /* Silence slirp4netns output unless requested */ + if(verbosity <= lvlInfo) { + devNullFd = open("/dev/null", O_WRONLY); + if(devNullFd == -1) + throw SysError("cannot open `/dev/null'"); + slirpCtx.logFD = devNullFd; + } + + addPhaseAfter(slirpCtx.phases, + "makeChrootSeparateFilesystem", + "prepareSlirpChroot", + prepareSlirpChrootAction); + + /* slirp behaves differently when uid != 0 */ + addPhaseAfter(slirpCtx.phases, + "lockMounts", + "remapIdsTo0", + remapIdsTo0Action); + +#if 0 /* For debugging networking issues */ + slirpCtx.env["SLIRP_DEBUG"] = "call,misc,error,tftp,verbose_call"; + slirpCtx.env["G_MESSAGES_DEBUG"] = "all"; +#endif + + pid_t slirpPid = cloneChild(slirpCtx); + + if(newUserNS) { + slirpSetupPipe.readSide.close(); + initializeUserNamespace(slirpPid, getuid(), getgid(), getuid(), getgid()); + writeFull(slirpSetupPipe.writeSide, (unsigned char*)"go\n", 3); + } + return slirpPid; +} + +static void clearRootWritePermsAction(SpawnContext & sctx) { if(chmod("/", 0555) == -1) throw SysError("changing mode of chroot root directory"); } + +/* Note: linux-only */ +bool haveGlobalIPv6Address() +{ + if(!pathExists("/proc/net/if_inet6")) return false; + + auto addresses = tokenizeString<Strings>(readFile("/proc/net/if_inet6", true), "\n"); + for(auto & i : addresses) { + auto fields = tokenizeString<vector<string> >(i, " "); + auto scopeHex = fields.at(3); + /* 0x0 means "Global scope" */ + if(scopeHex == "00" || scopeHex == "40") return true; + } + return false; +} + #endif /* CHROOT_ENABLED */ /* Return true if the operating system kernel part of SYSTEM1 and SYSTEM2 (the @@ -1731,10 +2165,10 @@ void DerivationGoal::startBuilder() f.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit); startNest(nest, lvlInfo, f % showPaths(missingPaths) % curRound % nrRounds); - /* A CloneSpawnContext reference can be passed to procedures expecting a - SpawnContext reference */ + /* A ChrootBuildSpawnContext reference can be passed to procedures + expecting a SpawnContext reference */ #if CHROOT_ENABLED - CloneSpawnContext ctx; + ChrootBuildSpawnContext ctx; #else SpawnContext ctx; #endif @@ -1945,6 +2379,10 @@ void DerivationGoal::startBuilder() ctx.supplementaryGroups = buildUser.getSupplementaryGIDs(); } +#if CHROOT_ENABLED + bool useSlirp4netns = false; +#endif + if (useChroot) { #if CHROOT_ENABLED ctx.phases = getCloneSpawnPhases(); @@ -1960,14 +2398,26 @@ void DerivationGoal::startBuilder() /* Clean up the chroot directory automatically. */ autoDelChroot = std::shared_ptr<AutoDelete>(new AutoDelete(chrootRootTop)); + if(fixedOutput) { + if(findProgram(settings.slirp4netns) == "") + printMsg(lvlError, format("`%1%' can't be found in PATH, network access disabled") % settings.slirp4netns); + else { + if(!pathExists("/dev/net/tun")) + printMsg(lvlError, "`/dev/net/tun' is missing, network access disabled"); + else { + useSlirp4netns = true; + ctx.ipv6Enabled = haveGlobalIPv6Address(); + } + } + } + ctx.doChroot = true; ctx.chrootRootDir = chrootRootDir; - ctx.cloneFlags = CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD; + ctx.cloneFlags = CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD; - if(!fixedOutput) { + if(!fixedOutput || /* redundant but shows the cases clearly */ + (fixedOutput && !settings.useHostLoopback)) ctx.initLoopback = true; - ctx.cloneFlags |= CLONE_NEWNET; - } if(!buildUser.enabled()) ctx.cloneFlags |= CLONE_NEWUSER; @@ -2014,18 +2464,42 @@ void DerivationGoal::startBuilder() if (fixedOutput) { /* Fixed-output derivations typically need to access the network, so give them access to /etc/resolv.conf and so on. */ - auto files = { "/etc/resolv.conf", "/etc/nsswitch.conf", - "/etc/services", "/etc/hosts" }; - for (auto & file: files) { + std::vector<Path> files = { "/etc/services", "/etc/nsswitch.conf" }; + if (useSlirp4netns) { + if (settings.useHostLoopback) { + string hosts; + if(pathExists("/etc/hosts")) { + hosts = readFile("/etc/hosts"); + hosts = std::regex_replace(hosts, std::regex("127\\.0\\.0\\.1"), "10.0.2.2"); + hosts = std::regex_replace(hosts, std::regex("::1"), "fd00::2"); + } else { + hosts = + "10.0.2.2 localhost\n" + "fd00::2 localhost\n"; + } + writeFile(chrootRootDir + "/etc/hosts", hosts); + } + else { + files.push_back("/etc/hosts"); + } + writeFile(chrootRootDir + "/etc/resolv.conf", "nameserver 10.0.2.3"); + } + else { + files.push_back("/etc/hosts"); + files.push_back("/etc/resolv.conf"); + } + for (auto & file : files) { if (pathExists(file)) { ctx.filesInChroot[file] = file; ctx.readOnlyFilesInChroot.insert(file); } } - } else { - /* Create /etc/hosts with localhost entry. */ - writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n"); } + else + /* Create /etc/hosts with localhost entry. */ + writeFile(chrootRootDir + "/etc/hosts", + "127.0.0.1 localhost\n" + "::1 localhost\n"); /* Bind-mount a user-configurable set of directories from the host file system. */ @@ -2175,7 +2649,9 @@ void DerivationGoal::startBuilder() - The private network namespace ensures that the builder cannot talk to the outside world (or vice versa). It only has a - private loopback interface. + private loopback interface. As an exception, fixed-output + derivations may talk to the outside world through slirp4netns, but + still in a separate network namespace. - The IPC namespace prevents the builder from communicating with outside processes using SysV IPC mechanisms (shared @@ -2191,7 +2667,7 @@ void DerivationGoal::startBuilder() AutoCloseFD parentSetupSocket; AutoCloseFD childSetupSocket; - if(((ctx.cloneFlags & CLONE_NEWUSER) != 0)) { + if(((ctx.cloneFlags & CLONE_NEWUSER) != 0) || useSlirp4netns) { if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fds)) throw SysError("creating setup socket"); parentSetupSocket = fds[0]; @@ -2202,6 +2678,15 @@ void DerivationGoal::startBuilder() ctx.setupFD = childSetupSocket; } + if(useSlirp4netns) { + addPhaseAfter(ctx.phases, "initLoopback", "setupTap", setupTapAction); + addPhaseAfter(ctx.phases, "setupTap", "waitForSlirpReady", + waitForSlirpReadyAction); + if(settings.useHostLoopback) + addPhaseAfter(ctx.phases, "waitForSlirpReady", "enableRouteLocalnet", + enableRouteLocalnetAction); + } + pid = cloneChild(ctx); if(childSetupSocket >= 0) childSetupSocket.close(); @@ -2211,6 +2696,34 @@ void DerivationGoal::startBuilder() initializeUserNamespace(pid); writeFull(parentSetupSocket, (unsigned char*)"go\n", 3); } + + try { + if(useSlirp4netns) { + AutoCloseFD tapfd = receiveFD(parentSetupSocket); + /* Start 'slirp4netns' to provide networking in the child process; + running the builder in the global network namespace would give + it access to the global namespace of abstract sockets, which + could be used to grant write access to the store to an external + process. */ + slirp = spawnSlirp4netns( + tapfd, + parentSetupSocket, + /* Do whatever we can to run slirp4netns as some user + other than root - run it as the build user if + necessary */ + buildUser.enabled() ? buildUser.getUID() : getuid(), + buildUser.enabled() ? buildUser.getGID() : getgid()); + } + } catch(std::exception & e) { + if(slirp != -1) { + slirp.kill(true); + } + if(pid != -1) { + pid.kill(true); + } + throw e; + } + } else #endif { diff --git a/nix/libstore/globals.cc b/nix/libstore/globals.cc index 10c60f6106..31da8d4769 100644 --- a/nix/libstore/globals.cc +++ b/nix/libstore/globals.cc @@ -56,6 +56,8 @@ Settings::Settings() envKeepDerivations = false; lockCPU = getEnv("NIX_AFFINITY_HACK", "1") == "1"; showTrace = false; + useHostLoopback = true; + slirp4netns = SLIRP4NETNS; } diff --git a/nix/libstore/globals.hh b/nix/libstore/globals.hh index 27616a2283..7cfa06e76c 100644 --- a/nix/libstore/globals.hh +++ b/nix/libstore/globals.hh @@ -206,6 +206,15 @@ struct Settings { /* Whether to show a stack trace if Nix evaluation fails. */ bool showTrace; + /* Whether fixed-output chroot builds should be able to use the host + loopback, for example to access a socks proxy. Note that while using + "localhost" and 127.0.0.1 to access the host loopback will work, using + ::1 will not, due to a limitation in Linux. */ + bool useHostLoopback; + + /* The filename to use for executing slirp4netns when it is needed. */ + Path slirp4netns; + private: SettingsMap settings, overrides; diff --git a/nix/libutil/util.cc b/nix/libutil/util.cc index e71e6c170a..327edf471f 100644 --- a/nix/libutil/util.cc +++ b/nix/libutil/util.cc @@ -14,6 +14,7 @@ #include <unistd.h> #include <fcntl.h> #include <limits.h> +#include <sys/socket.h> #ifdef __APPLE__ #include <sys/syscall.h> @@ -62,6 +63,27 @@ string getEnv(const string & key, const string & def) } +string findProgram(const string & program) +{ + if(program.empty()) return ""; + + if(program[0] == '/') return pathExists(program) ? program : ""; + + char *path_ = getenv("PATH"); + if(path_ == NULL) return ""; + string path = path_; + + Strings dirs = tokenizeString<Strings>(path, ":"); + for (const auto& i : dirs) { + if(i == "") continue; + string f = i + "/" + program; + if(pathExists(f)) return f; + } + + return ""; +} + + Path absPath(Path path, Path dir) { if (path[0] != '/') { @@ -857,6 +879,67 @@ void Pipe::create() } +void sendFD(int sock, int fd) +{ + ssize_t rc; + struct msghdr msg; + struct cmsghdr *cmsg; + char cmsgbuf[CMSG_SPACE(sizeof(fd))]; + struct iovec iov; + char dummy = '\0'; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &dummy; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); + msg.msg_controllen = cmsg->cmsg_len; + do + { + rc = sendmsg(sock, &msg, 0); + } while(rc < 0 && errno == EINTR); + if(rc < 0) + throw SysError("sending fd"); +} + + +int receiveFD(int sock) +{ + int fd; + ssize_t rc; + struct msghdr msg; + struct cmsghdr *cmsg; + char cmsgbuf[CMSG_SPACE(sizeof(fd))]; + struct iovec iov; + char dummy = '\0'; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &dummy; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + do + { + rc = recvmsg(sock, &msg, 0); + } while(rc < 0 && errno == EINTR); + if (rc < 0) + throw SysError("receiving fd"); + if (rc == 0) + throw Error("received EOF (empty message) while receiving fd"); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || cmsg->cmsg_type != SCM_RIGHTS) + throw Error("received message without an fd"); + memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + return fd; +} ////////////////////////////////////////////////////////////////////// @@ -1301,6 +1384,24 @@ bool endOfList(std::istream & str) return false; } +string decodeOctalEscaped(const string & s) +{ + string r; + for (string::const_iterator i = s.begin(); i != s.end(); ) { + if (*i != '\\') { r += *(i++); continue; } + unsigned char c = 0; + ++i; + for(int j = 0; j < 3; j++) { + if(i == s.end() || *i < '0' || *i >= '8') + throw Error("malformed octal escape"); + c = c * 8 + (*i - '0'); + ++i; + } + r += c; + } + return r; +} + void ignoreException() { diff --git a/nix/libutil/util.hh b/nix/libutil/util.hh index ab2395e959..648d6f19a4 100644 --- a/nix/libutil/util.hh +++ b/nix/libutil/util.hh @@ -19,6 +19,12 @@ namespace nix { /* Return an environment variable. */ string getEnv(const string & key, const string & def = ""); +/* Find the absolute filename corresponding to PROGRAM, searching PATH if + PROGRAM is a relative filename. If PROGRAM is an absolute filename for a + file that doesn't exist, or it can't be found in PATH, then return the + empty string. */ +string findProgram(const string & program); + /* Return an absolutized path, resolving paths relative to the specified directory, or the current directory otherwise. The path is also canonicalised. */ @@ -207,6 +213,10 @@ public: int borrow(); }; +/* Send and receive an FD on a unix-domain socket, along with a single null + byte of regular data. */ +void sendFD(int sock, int fd); +int receiveFD(int sock); class Pipe { @@ -370,6 +380,12 @@ string parseString(std::istream & str); bool endOfList(std::istream & str); +/* Escape a string that contains octal-encoded escape codes such as + used in /etc/fstab and /proc/mounts (e.g. "foo\040bar" decodes to + "foo bar"). */ +string decodeOctalEscaped(const string & s); + + /* Exception handling in destructors: print an error message, then ignore the exception. */ void ignoreException(); diff --git a/nix/nix-daemon/guix-daemon.cc b/nix/nix-daemon/guix-daemon.cc index d7ab9c5e64..30727d5559 100644 --- a/nix/nix-daemon/guix-daemon.cc +++ b/nix/nix-daemon/guix-daemon.cc @@ -90,6 +90,7 @@ builds derivations on behalf of its clients."); #define GUIX_OPT_MAX_SILENT_TIME 19 #define GUIX_OPT_LOG_COMPRESSION 20 #define GUIX_OPT_DISCOVER 21 +#define GUIX_OPT_ISOLATE_HOST_LOOPBACK 22 static const struct argp_option options[] = { @@ -160,6 +161,8 @@ to live outputs") }, n_("listen for connections on SOCKET") }, { "debug", GUIX_OPT_DEBUG, 0, 0, n_("produce debugging output") }, + { "isolate-host-loopback", GUIX_OPT_ISOLATE_HOST_LOOPBACK, 0, 0, + n_("do not allow fixed-output chroot builds to access the host loopback") }, { 0, 0, 0, 0, 0 } }; @@ -294,6 +297,9 @@ parse_opt (int key, char *arg, struct argp_state *state) case GUIX_OPT_SYSTEM: settings.thisSystem = arg; break; + case GUIX_OPT_ISOLATE_HOST_LOOPBACK: + settings.useHostLoopback = false; + break; default: return (error_t) ARGP_ERR_UNKNOWN; } diff --git a/tests/derivations.scm b/tests/derivations.scm index 996e5ac504..077aee0909 100644 --- a/tests/derivations.scm +++ b/tests/derivations.scm @@ -25,12 +25,15 @@ #:use-module ((gcrypt hash) #:prefix gcrypt:) #:use-module (guix base32) #:use-module ((guix git) #:select (with-repository)) + #:use-module (guix config) #:use-module (guix tests) #:use-module (guix tests git) #:use-module (guix tests http) #:use-module ((guix packages) #:select (package-derivation base32)) #:use-module ((guix build utils) - #:select (executable-file? strip-store-file-name)) + #:select (executable-file? strip-store-file-name which)) + #:use-module ((gnu build linux-container) + #:select (unprivileged-user-namespace-supported?)) #:use-module ((guix hash) #:select (file-hash*)) #:use-module ((git oid) #:select (oid->string)) #:use-module ((git reference) #:select (reference-name->oid)) @@ -55,6 +58,14 @@ ;; Globally disable grafts because they can trigger early builds. (%graft? #f) +;; This can happen when someone is running tests without --disable-chroot and +;; with either slirp4netns or /dev/net/tun unavailable. +(define builder-network-isolated? + (and (target-linux? %system) + (unprivileged-user-namespace-supported?) + (or (not (which "slirp4netns")) + (not (file-exists? "/dev/net/tun"))))) + (define (bootstrap-binary name) (let ((bin (search-bootstrap-binary name (%current-system)))) (and %store @@ -502,6 +513,7 @@ #:hash #vu8(1 2 3)) #f)) +(unless (not builder-network-isolated?) (test-skip 1)) (test-assert "fixed-output derivation, network access, localhost" ;; Test a fixed-output derivation connecting to "localhost". (let ((text (random-text))) @@ -534,7 +546,8 @@ get-string-all) text)))))) -(unless (network-reachable?) (test-skip 1)) +(unless (and (network-reachable?) (not builder-network-isolated?)) + (test-skip 1)) (test-assert "fixed-output derivation, network access, external host" ;; Test a fixed-output derivation connecting to an external server. (let* ((drv (build-expression->derivation |