commit 461a9ec198c1e2b9c39a34e21786747dbb8671b4
parent b4cf0b2c84d26aadc22864ed48c6304738d523ea
Author: Jan Pobrislo <ccx@te2000.cz>
Date: Thu, 13 Nov 2025 00:06:54 +0000
ns_pivot_into: add ability to load seccomp BPF programs
Diffstat:
| M | src/ns_pivot_into.c | | | 112 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
1 file changed, 78 insertions(+), 34 deletions(-)
diff --git a/src/ns_pivot_into.c b/src/ns_pivot_into.c
@@ -5,6 +5,7 @@
#include <errno.h>
#include <sys/prctl.h>
#include <sys/capability.h>
+#include <sys/mman.h> /* for mmap() */
#include <sys/mount.h>
#include <sys/syscall.h> /* for SYS_pivot_root */
#include <linux/filter.h> /* for seccomp */
@@ -71,13 +72,6 @@ void pivot_and_umount(
if (!stralloc_catb(&sa_mtp, new_place_for_old_root, oldroot_len)) {
strerr_diefu1sys(111, "store mount path");
}
- /* should be enough for 64bit mtp_n */
- /*
- char env_name[28] = "NS_MTP=\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
- #if SIZE_FMT + 6 > 28
- #error "env_name can't fit >64bit numbers"
- #endif
- */
if (!size_scan(getenv_dienotset("NS_MTP_COUNT"), &mtp_count)) {
strerr_dieinvalid(100, "NS_MTP_COUNT");
}
@@ -166,6 +160,38 @@ void drop(struct drop_privs_s const *const privs) {
void load_seccomp_program(int fd) {
strerr_dief(99, "seccomp program loading not yet implemented");
+ struct sock_fprog seccomp_filter;
+ struct stat st;
+ if (fstat (fd, &st) < 0) {
+ strerr_diefu1sys(111, "fstat() the seccomp program fd");
+ }
+ if (st.st_size > 0) {
+ void * addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED) {
+ strerr_diefu1sys(111, "mmap() the seccomp program fd");
+ }
+ seccomp_filter.len = st.st_size / 8;
+ seccomp_filter.filter = (struct sock_filter *) addr;
+ } else {
+ stralloc sa = STRALLOC_ZERO;
+ char read_buf[4096];
+ ssize_t read_bytes;
+ do {
+ read_bytes = read (fd, read_buf, sizeof(read_buf));
+ if (read_bytes < 0 && errno != EINTR) {
+ strerr_diefu1sys(111, "read() the seccomp program fd");
+ } else if (read_bytes > 0) {
+ if (!stralloc_catb(&sa, read_buf, read_bytes)) {
+ strerr_diefu1sys(111, "store seccomp program");
+ }
+ }
+ } while (read_bytes != 0);
+ seccomp_filter.len = sa.len / 8;
+ seccomp_filter.filter = (struct sock_filter *) sa.s;
+ }
+ if (syscall_seccomp_set_filter(0, &seccomp_filter)) {
+ strerr_diefu1sys(111, "apply the seccomp BPF program");
+ }
}
int main (int argc, char const *const *argv) {
@@ -177,12 +203,13 @@ int main (int argc, char const *const *argv) {
.gidn = (size_t) -1,
};
char const * chdir_to = 0;
+ int seccomp_fd = -1;
PROG = "ns_pivot_into";
{
subgetopt l = SUBGETOPT_ZERO;
for (;;) {
- int opt = subgetopt_r(argc, argv, "U:C:", &l);
+ int opt = subgetopt_r(argc, argv, "U:S:C:", &l);
if (opt == -1) {
break;
}
@@ -190,33 +217,43 @@ int main (int argc, char const *const *argv) {
case 'C' : /* chdir-to */
chdir_to = l.arg;
break;
- case 'U' : { /* setuid from env */
- drop_privs = 1;
- /*
- The IAB 3-tuple of capability vectors (Inh, Amb and Bound),
- captured in type cap_iab_t combine to pass capabilities
- from one process to another through execve(2) system calls.
- */
- privs.new_iab = cap_iab_from_text(l.arg);
- if (privs.new_iab == NULL) {
- strerr_dieinvalid(100, "capability set");
- }
- if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) {
- strerr_dieinvalid(100, "UID");
- }
- if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) {
- strerr_dieinvalid(100, "GID");
- }
- char const *x = getenv_dienotset("GIDLIST");
- if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) {
- strerr_dieinvalid(100, "GIDLIST");
- }
- if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) {
- strerr_diefu1sys(111, "store env names");
- }
- unex.count = 3;
- break ;
+ case 'S' : /* FD with seccomp BPF program */
+ if (!int_scan(l.arg, &seccomp_fd)) {
+ strerr_dieinvalid(100, "seccomp BPF program file descriptor");
+ }
+ if (ndelay_off(seccomp_fd)) {
+ strerr_diefu1sys(111, "make seccomp program fd non-blocking");
+ }
+ if (coe(seccomp_fd)) {
+ strerr_diefu1sys(111, "make seccomp program fd close-on-exec");
+ }
+ break;
+ case 'U' : /* setuid from env */
+ drop_privs = 1;
+ /*
+ The IAB 3-tuple of capability vectors (Inh, Amb and Bound),
+ captured in type cap_iab_t combine to pass capabilities
+ from one process to another through execve(2) system calls.
+ */
+ privs.new_iab = cap_iab_from_text(l.arg);
+ if (privs.new_iab == NULL) {
+ strerr_dieinvalid(100, "capability set");
+ }
+ if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) {
+ strerr_dieinvalid(100, "UID");
+ }
+ if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) {
+ strerr_dieinvalid(100, "GID");
}
+ char const *x = getenv_dienotset("GIDLIST");
+ if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) {
+ strerr_dieinvalid(100, "GIDLIST");
+ }
+ if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) {
+ strerr_diefu1sys(111, "store env names");
+ }
+ unex.count = 3;
+ break ;
default :
dieusage();
}
@@ -230,6 +267,13 @@ int main (int argc, char const *const *argv) {
pivot_and_umount(argv[1], argv[2], &unex);
if (drop_privs) {
drop(&privs);
+ } else if (seccomp_fd >= 0) {
+ if (prctl (PR_SET_NO_NEW_PRIVS, 1L, 0L, 0L, 0L)) {
+ strerr_diefu1sys(111, "enable NO_NEW_PRIVS for seccomp");
+ }
+ }
+ if (seccomp_fd >= 0) {
+ load_seccomp_program(seccomp_fd);
}
if (chdir_to) {
if (chdir(chdir_to) < 0) {