diff options
Diffstat (limited to 'nix/libutil/seccomp.hh')
-rw-r--r-- | nix/libutil/seccomp.hh | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/nix/libutil/seccomp.hh b/nix/libutil/seccomp.hh new file mode 100644 index 0000000000..634dfad5f8 --- /dev/null +++ b/nix/libutil/seccomp.hh @@ -0,0 +1,222 @@ +#pragma once + +#include "util.hh" +#include <linux/audit.h> /* For AUDIT_ARCH_* */ +#include <linux/seccomp.h> +#include <linux/filter.h> + + +/* This file provides two preprocessor macros (among other things): + 1. AUDIT_ARCH_NATIVE, which evaluates to whichever of the AUDIT_ARCH_* + values best represents the target system. Linux's internal headers have + a SECCOMP_ARCH_NATIVE since 2020, but it's not user-visible. Detection + of this is based on src/arch.c in libseccomp. + 2. NATIVE_SYSCALL_RANGES, an array initializer for an array of two-element + objects, the first of which is an integral number representing the + start (inclusive) of a range of valid syscall numbers, and the second + of which is an integral number representing the end (inclusive) of that + range of valid syscall numbers. The ranges provided are all + non-overlapping and strictly ascending (that is, the start of a range is + strictly higher than any of the numbers in any of the ranges that + precede it). All numbers involved fit into a long. + + These ranges were generated from the various syscall.tbl, + syscall_32.tbl, and syscall_64.tbl files lying around in the linux + kernel source. Some were derived from + include/uapi/asm-generic/unistd.h. The kernel source used was commit + b3ee1e460951 of https://github.com/torvalds/linux.git, read on + 2025-04-23. Not all of the gaps in the files have any comments pointing + them out, so I recommend using build-aux/extract-syscall-ranges.sh for + the *.tbl files. + + The intent behind saving these ranges is to be able to use a + default-allow seccomp policy that nevertheless disallows future + syscalls. This ensures that our security analysis can work with a + static, well-defined set of system calls that won't grow in the future + unless someone explicitly revisits the system call tables to consider + the implications of the new additions. */ + +/* Both ends are inclusive. Some of the .tbl files use strange entries for + * the "abi" field, check arch/$ARCH/kernel/Makefile.syscalls to see what it + * specifies for syscall_abis_32 and syscall_abis_64 in addition to 32 or 64 + * and "common" (added in Makefile.asm-headers). Also check what, if + * anything, the makefile uses as the --offset flag to syscallhdr.sh. And + * look at arch/$ARCH/include/uapi/asm/unistd.h to see what value the offset + * takes in what configurations. */ + +#ifndef AUDIT_ARCH_NATIVE + +#if __i386__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_I386 +#define NATIVE_SYSCALL_RANGES { {0, 221}, {224, 250}, {252, 284}, {286, 386}, \ + {393, 414}, {416, 466} } +#elif __x86_64__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_X86_64 +#ifdef __ILP32__ +#include <asm/unistd.h> +#define X32RANGE(low, high) { (low | __X32_SYSCALL_BIT), (high | __X32_SYSCALL_BIT) } +#define NATIVE_SYSCALL_RANGES \ + { X32RANGE(0, 12), X32RANGE(14, 14), X32RANGE(17, 18), X32RANGE(21, 44), X32RANGE(48, 53), \ + X32RANGE(56, 58), X32RANGE(60, 100), X32RANGE(102, 126), X32RANGE(130, 130), \ + X32RANGE(132, 133), X32RANGE(135, 155), X32RANGE(157, 173), X32RANGE(175, 176), \ + X32RANGE(179, 179), X32RANGE(181, 204), X32RANGE(207, 208), X32RANGE(210, 210), \ + X32RANGE(212, 213), X32RANGE(216, 221), X32RANGE(223, 235), X32RANGE(237, 243), \ + X32RANGE(245, 245), X32RANGE(248, 272), X32RANGE(275, 277), X32RANGE(280, 294), \ + X32RANGE(298, 298), X32RANGE(300, 306), X32RANGE(308, 309), X32RANGE(312, 321), \ + X32RANGE(323, 326), X32RANGE(329, 335), X32RANGE(424, 466), X32RANGE(512, 547) } +#else +#define NATIVE_SYSCALL_RANGES { {0, 335}, {424, 466} } +#endif +#elif __arm__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_ARM +/* Note: there are at present 6 extra ARM syscall numbers not listed in + arch/arm/tools/syscall.tbl, namely __ARM_NR_breakpoint through + __ARM_NR_get_tls. */ +#ifdef __ARM_EABI__ +#include <asm/unistd.h> +#define NATIVE_SYSCALL_RANGES \ + { {0, 6}, {8, 12}, {14, 16}, {19, 21}, {23, 24}, {26, 26}, {29, 29}, \ + {33, 34}, {36, 43}, {45, 47}, {49, 52}, {54, 55}, {57, 57}, {60, 67}, \ + {70, 75}, {77, 81}, {83, 83}, {85, 88}, {91, 97}, {99, 100}, {103, 108}, \ + {111, 111}, {114, 116}, {118, 122}, {124, 126}, {128, 129}, {131, 136}, \ + {138, 165}, {168, 187}, {190, 221}, {224, 253}, {256, 401}, {403, 414}, \ + {416, 446}, {448, 466}, {(__ARM_NR_BASE + 1), (__ARM_NR_BASE + 6)} } +#else +#include <asm/unistd.h> +#define OABIRANGE(low, high) { (low | __NR_OABI_SYSCALL_BASE), (high | __NR_OABI_SYSCALL_BASE) } +#define NATIVE_SYSCALL_RANGES \ + { OABIRANGE(0, 6), OABIRANGE(8, 16), OABIRANGE(19, 27), OABIRANGE(29, 30), \ + OABIRANGE(33, 34), OABIRANGE(36, 43), OABIRANGE(45, 47), OABIRANGE(49, 52), \ + OABIRANGE(54, 55), OABIRANGE(57, 57), OABIRANGE(60, 67), OABIRANGE(70, 83), \ + OABIRANGE(85, 97), OABIRANGE(99, 100), OABIRANGE(102, 108), \ + OABIRANGE(111, 111), OABIRANGE(113, 122), OABIRANGE(124, 126), \ + OABIRANGE(128, 129), OABIRANGE(131, 136), OABIRANGE(138, 165), \ + OABIRANGE(168, 187), OABIRANGE(190, 221), OABIRANGE(224, 253), \ + OABIRANGE(256, 401), OABIRANGE(403, 414), OABIRANGE(416, 446), \ + OABIRANGE(448, 466), {(__ARM_NR_BASE + 1), (__ARM_NR_BASE + 6)} } +#endif +#elif __aarch64__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_AARCH64 +/* extract-syscall-ranges.sh $LINUXSOURCE/arch/arm64/tools/syscall_64.tbl \ + '64|renameat|rlimit|memfd_secret' + + the extra ABIs are taken from arch/arm64/kernel/Makefile.syscalls and + scripts/Makefile.asm-headers */ +#define NATIVE_SYSCALL_RANGES { {0, 243}, {260, 294}, {424, 466} } +/* To my knowledge there is no x32 equivalent for aarch64 in mainline linux */ +#elif __mips__ && _MIPS_SIM == _MIPS_SIM_ABI32 +/* o32 abi in both endianness cases */ +#include <asm/unistd.h> +#define SYSRANGE(low, high) {(low) + __NR_Linux, (high) + __NR_Linux} +#define NATIVE_SYSCALL_RANGES \ + { SYSRANGE(0, 278), SYSRANGE(280, 368), SYSRANGE(393, 414), \ + SYSRANGE(416, 446), SYSRANGE(448, 466) } +#if __MIPSEB__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPS; +#elif __MIPSEL__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPSEL +#endif +#elif __mips__ && _MIPS_SIM == _MIPS_SIM_ABI64 +/* n64 abi in both endianness cases */ +#include <asm/unistd.h> +#define SYSRANGE(low, high) {(low) + __NR_Linux, (high) + __NR_Linux} +#define NATIVE_SYSCALL_RANGES \ + { SYSRANGE(0, 237), SYSRANGE(239, 328), SYSRANGE(424, 446), SYSRANGE(448, 466) } +#if __MIPSEB__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPS64 +#elif __MIPSEL__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPSEL64 +#endif /* _MIPS_SIM_ABI64 */ +#elif __mips__ && _MIPS_SIM == _MIPS_SIM_NABI32 +/* n32 abi in both endianness cases */ +#include <asm/unistd.h> +#define SYSRANGE(low, high) {(low) + __NR_Linux, (high) + __NR_Linux} +#define NATIVE_SYSCALL_RANGES \ + { SYSRANGE(0, 241), SYSRANGE(243, 332), SYSRANGE(403, 414), \ + SYSRANGE(416, 446), SYSRANGE(448, 466) } +#if __MIPSEB__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPS64N32 +#elif __MIPSEL__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_MIPSEL64N32 +#endif /* _MIPS_SIM_NABI32 */ +#elif __hppa64__ /* hppa64 must be checked before hppa */ +#define NATIVE_SYSCALL_RANGES \ + { {0, 101}, {103, 126}, {128, 129}, {131, 136}, {138, 166}, \ + {168, 168}, {170, 195}, {198, 202}, {206, 212}, {215, 219}, \ + {222, 262}, {264, 302}, {304, 356}, {424, 446}, {448, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_PARISC64 +#elif __hppa__ +#define NATIVE_SYSCALL_RANGES \ + { {0, 101}, {103, 126}, {128, 129}, {131, 136}, {138, 166}, \ + {168, 168}, {170, 195}, {198, 202}, {206, 212}, {215, 219}, \ + {222, 262}, {264, 302}, {304, 356}, {403, 414}, {416, 446}, {448, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_PARISC +#elif __PPC64__ +#define NATIVE_SYSCALL_RANGES \ + { {0, 191}, {198, 203}, {205, 223}, {225, 225}, {227, 253}, \ + {255, 256}, {258, 365}, {378, 388}, {392, 402}, {424, 446}, {448, 466} } +#ifdef __BIG_ENDIAN__ +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_PPC64 +#else +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_PPC64LE +#endif +#elif __PPC__ +#define NATIVE_SYSCALL_RANGES \ + { {0, 223}, {225, 256}, {258, 365}, {378, 388}, {393, 414}, \ + {416, 446}, {448, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_PPC +#elif __s390x__ /* s390x must be checked before s390 */ +#define NATIVE_SYSCALL_RANGES \ + { {1, 12}, {14, 15}, {19, 22}, {26, 27}, {29, 30}, {33, 34}, \ + {36, 43}, {45, 45}, {48, 48}, {51, 52}, {54, 55}, {57, 57}, \ + {60, 67}, {72, 75}, {77, 79}, {83, 83}, {85, 94}, {96, 97}, \ + {99, 100}, {102, 108}, {110, 112}, {114, 122}, {124, 137}, \ + {141, 163}, {167, 169}, {172, 181}, {183, 191}, {198, 220}, \ + {222, 222}, {224, 241}, {243, 262}, {265, 386}, {392, 402}, {424, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_S390X +#elif __s390__ +#define NATIVE_SYSCALL_RANGES \ + { {1, 16}, {19, 27}, {29, 30}, {33, 34}, {36, 43}, {45, 52}, \ + {54, 55}, {57, 57}, {60, 67}, {70, 81}, {83, 83}, {85, 97}, \ + {99, 108}, {110, 112}, {114, 122}, {124, 165}, {167, 241}, \ + {243, 262}, {264, 386}, {393, 414}, {416, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_S390 +#elif __riscv && __riscv_xlen == 64 +#define NATIVE_SYSCALL_RANGES { {0, 37}, {39, 243}, {258, 294}, {424, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_RISCV64 +#elif __riscv && __riscv_xlen == 32 +#define NATIVE_SYSCALL_RANGES \ + { {0, 3}, {5, 37}, {39, 71}, {74, 78}, {81, 85}, {89, 97}, {99, 100}, \ + {102, 107}, {109, 109}, {111, 111}, {116, 126}, {128, 136}, {138, 162}, \ + {165, 168}, {172, 181}, {184, 191}, {193, 242}, {258, 259}, {261, 265}, \ + {267, 291}, {293, 294}, {403, 414}, {416, 466} } +#define AUDIT_ARCH_NATIVE AUDIT_ARCH_RISCV32 +#else +#error cannot determine which AUDIT_ARCH_* value to use for AUDIT_ARCH_NATIVE +#endif + +#else +#ifndef NATIVE_SYSCALL_RANGES +/* Fall back to default-allow if the user specified (with + -DAUDIT_ARCH_NATIVE=...) an arch but not NATIVE_SYSCALL_RANGES */ +#define NATIVE_SYSCALL_RANGES {} +#endif +#endif /* #ifndef AUDIT_ARCH_NATIVE */ + +namespace nix { + +struct Uint32RangeAction { + uint32_t low; /* inclusive */ + uint32_t high; /* inclusive */ + std::vector<struct sock_filter> instructions; +}; + +std::vector<struct sock_filter> rangeActionsToFilter(std::vector<Uint32RangeAction> & ranges); + + +std::vector<struct sock_filter> +seccompMatchu64(std::vector<struct sock_filter> & out, + uint64_t value, + std::vector<struct sock_filter> instructions, + uint32_t offset); +} |