commit c303e1a6123fffdd25e495ab417d7f55ddb19619
parent 1f365a8c08f1be35010b58d935310edfbeb5dce5
Author: Jan Pobrislo <ccx@te2000.cz>
Date: Wed, 12 Nov 2025 21:40:32 +0000
ns_pivot_into utility for sandboxed namespaces
Diffstat:
4 files changed, 315 insertions(+), 1 deletion(-)
diff --git a/src/Makefile b/src/Makefile
@@ -1,7 +1,7 @@
tools_simple:=argv0exec nosuid pidns_run safelink spawn-pty fdsend fdrecv fdrecvto socketpair ptsname mtime_to_uuidv7 ucspi-socksserver ucspi-socksserver-connected
-tools_libcap:=applyuidgid-caps
+tools_libcap:=applyuidgid-caps ns_pivot_into
tools_libs6:=ucspi-socksserver-access
diff --git a/src/ns_mounts_to_env b/src/ns_mounts_to_env
@@ -0,0 +1,44 @@
+#!/bin/sh
+prog=$(awk '
+function el_quote(s) {
+ gsub(/\\/, "\\\\", s); # first double all backslashes
+ gsub(/"/, "\\\"", s); # then escape quote marks
+ return "\"" s "\"" # then surround with quote marks
+}
+
+BEGIN {
+ # mount IDs seem to be unsigned, so lets use -1 to signify not found
+ max_id = -1
+ root_id = -1
+ count = 0
+}
+
+# read in /proc/self/mountinfo
+$5 == "/" { root_id = $1 }
+{
+ max_id = max_id < $1 ? $1 : max_id
+ parents[$1] = $2
+ mountpoints[$1] = $5
+}
+
+function print_umount(mtp){
+ print "NS_MTP_" (++count) "=" el_quote(mtp)
+}
+
+function recursively_umount(mount_id, id) {
+ for(id=max_id; id>=0; id--){
+ if(parents[id] == mount_id){
+ recursively_umount(id)
+ }
+ }
+ print_umount(mountpoints[mount_id])
+}
+
+END{
+ if(root_id == -1) { exit 111 }
+ print "env"
+ recursively_umount(root_id)
+ print "NS_MTP_COUNT=" count
+}
+' /proc/self/mountinfo) || exit $?
+exec execlineb -s0 -c "$prog" "$@"
diff --git a/src/ns_pivot_into.c b/src/ns_pivot_into.c
@@ -0,0 +1,229 @@
+/* ISC license. */
+
+#include <unistd.h>
+#include <grp.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/syscall.h> /* for SYS_pivot_root */
+#include <linux/securebits.h>
+
+#include <skalibs/types.h>
+#include <skalibs/setgroups.h>
+#include <skalibs/strerr.h>
+#include <skalibs/sgetopt.h>
+#include <skalibs/stralloc.h>
+#include <skalibs/djbunix.h>
+#include <skalibs/exec.h>
+
+#define USAGE "ns_pivot_into [ -U iab_caps ] [ -C chdir ] old-place-for-new-root new-place-for-old-root prog..."
+#define dieusage() strerr_dieusage(100, USAGE)
+
+struct drop_privs_s {
+ uid_t uid;
+ gid_t gid;
+ size_t gidn;
+ gid_t gids[NGROUPS_MAX + 1];
+ cap_iab_t new_iab;
+};
+
+struct unexport_s {
+ size_t count;
+ stralloc sa;
+};
+
+void drop(struct drop_privs_s const *const privs) {
+ /* make sure privileges are dropped permanently */
+ if (prctl(PR_SET_SECUREBITS,
+ SECBIT_KEEP_CAPS | /* technically unneeded as NO_SETUID_FIXUP is superset */
+ SECBIT_NO_SETUID_FIXUP |
+ SECBIT_NOROOT | /* disables suid and filecap privilege gain */
+ SECBIT_NOROOT_LOCKED) < 0) {
+ strerr_dief1sys(111, "Failed to set securebits via prctl()");
+ }
+
+ /* set these capabilities for the current process */
+ if (cap_iab_set_proc(privs->new_iab) != 0) {
+ strerr_dief1sys(111, "Failed to set capabilities via cap_set_proc()");
+ }
+
+ /* ancillary groups */
+ if (privs->gidn != (size_t) -1
+ && setgroups_and_gid(privs->gid ? privs->gid : getegid(), privs->gidn,
+ privs->gids) < 0) {
+ strerr_diefu1sys(111, "set supplementary group list");
+ }
+
+ /* primary group */
+ if (privs->gid && setgid(privs->gid) < 0) {
+ strerr_diefu1sys(111, "setgid");
+ }
+
+ /* set userid */
+ if (privs->uid && setuid(privs->uid) < 0) {
+ strerr_diefu1sys(111, "setuid");
+ }
+}
+
+char const *const getenv_dienotset(char const *const name) {
+ char const *x = getenv(name);
+ if (!x) {
+ strerr_dienotset(100, name);
+ }
+ return x;
+}
+
+int stralloc_catenv0(stralloc *sa, char const * env_name) {
+ char const *const value = getenv_dienotset(env_name);
+ return stralloc_catb(sa, value, strlen(value) + 1);
+}
+
+int pivot_root(char const *new_root, char const *put_old) {
+ return syscall(SYS_pivot_root, *new_root, *put_old);
+}
+
+void pivot_and_umount(
+ char const *const old_place_for_new_root,
+ char const *const new_place_for_old_root,
+ struct unexport_s *const unex
+) {
+ size_t mtp_count, mtp_n;
+ int oldroot_len = strlen(new_place_for_old_root);
+ stralloc sa_env = STRALLOC_ZERO;
+ stralloc sa_mtp = STRALLOC_ZERO;
+ if (!stralloc_catb(&sa_mtp, new_place_for_old_root, oldroot_len)) {
+ strerr_diefu1sys(111, "store mount path");
+ }
+ /* should be enough for 64bit mtp_n */
+ /*
+ char env_name[28] = "NS_MTP=\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+ #if SIZE_FMT + 6 > 28
+ #error "env_name can't fit >64bit numbers"
+ #endif
+ */
+ if (!size_scan(getenv_dienotset("NS_MTP_COUNT"), &mtp_count)) {
+ strerr_dieinvalid(100, "NS_MTP_COUNT");
+ }
+ if (!stralloc_catb(&unex->sa, "NS_MTP_COUNT", 13)) {
+ strerr_diefu1sys(111, "store env name");
+ }
+ unex->count++;
+
+ /* pivot_root */
+ if (chdir(old_place_for_new_root) < 0) {
+ strerr_diefu2sys(111, "chdir to ", old_place_for_new_root) ;
+ }
+ if (pivot_root(".", new_place_for_old_root) < 0) {
+ strerr_diefu1sys(111, "pivot_root") ;
+ }
+ if (chroot(".") < 0) {
+ strerr_diefu1sys(111, "chroot") ;
+ }
+
+ /* umount oldroot */
+ char env_name[SIZE_FMT + 6] = "NS_MTP=";
+ size_t env_name_len;
+ for (mtp_n = 1; mtp_n <= mtp_count; mtp_n++) {
+ env_name_len = 6 + size_fmt(&env_name[6], mtp_n);
+ env_name[env_name_len] = 0;
+ sa_mtp.len = oldroot_len;
+ if (!stralloc_catb(&unex->sa, env_name, env_name_len)) {
+ strerr_diefu1sys(111, "store env name");
+ }
+ unex->count++;
+
+ sa_mtp.len = oldroot_len;
+ if (!stralloc_catenv0(&sa_mtp, getenv_dienotset(env_name))) {
+ strerr_diefu1sys(111, "store mount path");
+ }
+ if (sa_mtp.s[oldroot_len] != '/') {
+ strerr_dieinvalid(100, env_name);
+ }
+ switch (umount(sa_mtp.s)) {
+ case 0:
+ break;
+ case EBUSY:
+ if (umount2(sa_mtp.s, MNT_DETACH)) {
+ strerr_diefu2sys(111, "to detach mountpoint", sa_mtp.s) ;
+ }
+ break;
+ default:
+ strerr_diefu2sys(111, "to umount", sa_mtp.s) ;
+ break;
+ }
+ }
+}
+
+int main (int argc, char const *const *argv) {
+ struct unexport_s unex = { 0, STRALLOC_ZERO };
+ int drop_privs = 0;
+ struct drop_privs_s privs = {
+ .uid = 0,
+ .gid = 0,
+ .gidn = (size_t) -1,
+ };
+ char const * chdir_to = 0;
+
+ PROG = "ns_pivot_into";
+ {
+ subgetopt l = SUBGETOPT_ZERO;
+ for (;;) {
+ int opt = subgetopt_r(argc, argv, "U:C:", &l);
+ if (opt == -1) {
+ break;
+ }
+ switch (opt) {
+ case 'C' : /* chdir-to */
+ chdir_to = l.arg;
+ break;
+ case 'U' : { /* setuid from env */
+ drop_privs = 1;
+ /*
+ The IAB 3-tuple of capability vectors (Inh, Amb and Bound),
+ captured in type cap_iab_t combine to pass capabilities
+ from one process to another through execve(2) system calls.
+ */
+ privs.new_iab = cap_iab_from_text(l.arg);
+ if (privs.new_iab == NULL) {
+ strerr_dieinvalid(100, "capability set");
+ }
+ if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) {
+ strerr_dieinvalid(100, "UID");
+ }
+ if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) {
+ strerr_dieinvalid(100, "GID");
+ }
+ char const *x = getenv_dienotset("GIDLIST");
+ if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) {
+ strerr_dieinvalid(100, "GIDLIST");
+ }
+ if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) {
+ strerr_diefu1sys(111, "store env names");
+ }
+ unex.count = 3;
+ break ;
+ }
+ default :
+ dieusage();
+ }
+ }
+ argc -= l.ind ;
+ argv += l.ind ;
+ }
+ if (argc < 3) {
+ dieusage();
+ }
+ pivot_and_umount(argv[1], argv[2], &unex);
+ if (drop_privs) {
+ drop(&privs);
+ }
+ if (chdir_to) {
+ if (chdir(chdir_to) < 0) {
+ strerr_diefu2sys(111, "chdir to ", chdir_to);
+ }
+ }
+ xmexec_n(&argv[3], unex.sa.s, unex.sa.len, unex.count);
+}
diff --git a/src/ns_umount_env.awk b/src/ns_umount_env.awk
@@ -0,0 +1,41 @@
+#!/usr/bin/awk -f
+function el_quote(s) {
+ gsub(/\\/, "\\\\", s); # first double all backslashes
+ gsub(/"/, "\\\"", s); # then escape quote marks
+ return "\"" s "\"" # then surround with quote marks
+}
+
+BEGIN {
+ # mount IDs seem to be unsigned, so lets use -1 to signify not found
+ max_id = -1
+ root_id = -1
+ count = 0
+}
+
+# read in /proc/self/mountinfo
+$5 == "/" { root_id = $1 }
+{
+ max_id = max_id < $1 ? $1 : max_id
+ parents[$1] = $2
+ mountpoints[$1] = $5
+}
+
+function print_umount(mtp){
+ print "NS_MTP_" (++count) "=" el_quote(mtp)
+}
+
+function recursively_umount(mount_id, id) {
+ for(id=max_id; id>=0; id--){
+ if(parents[id] == mount_id){
+ recursively_umount(id)
+ }
+ }
+ print_umount(mountpoints[mount_id])
+}
+
+END{
+ if(root_id == -1) { exit 111 }
+ print "env"
+ recursively_umount(root_id)
+ print "NS_MTP_COUNT=" count
+}