lnstools

Linux namespace tools
git clone https://ccx.te2000.cz/git/lnstools
Log | Files | Refs

commit d2b3a9dd8b7df741b6a5b9ba3ed025bf16b9ad4e
parent 40784be9f20d5d9695debac30a044070d53f83d9
Author: Jan Pobrislo <ccx@te2000.cz>
Date:   Thu, 13 Nov 2025 20:16:38 +0000

Import utility sources from other projects

Diffstat:
MMakefile | 2+-
Mscripts/link | 2+-
Ascripts/style | 21+++++++++++++++++++++
Msimplelink.mk | 4++--
Dsrc/hello_main.c | 10----------
Asrc/lns-applyuidgid_main.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/lns-envuidgid_main.c | 180+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/lns-lockdown_main.c | 284+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/lns-mounts-to-env | 44++++++++++++++++++++++++++++++++++++++++++++
Asrc/lns-pidns_main.c | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 740 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ all: all_executables .PHONY: all -executables:=hello +executables:=lns-lockdown lns-envuidgid lns-applyuidgid lns-pidns include simplelink.mk clean: diff --git a/scripts/link b/scripts/link @@ -1,2 +1,2 @@ #!/bin/sh -xe -exec gcc -static "$@" +exec gcc -static "$@" -lskarnet -lcap diff --git a/scripts/style b/scripts/style @@ -0,0 +1,21 @@ +#!/bin/sh +exec astyle \ + --style=attach \ + --indent=spaces=4 \ + --max-code-length=88 \ + --attach-closing-while \ + --attach-return-type \ + --break-one-line-headers \ + --add-braces \ + --pad-oper \ + --pad-comma \ + --pad-include \ + --pad-header \ + --unpad-paren \ + --unpad-brackets \ + --add-braces \ + --indent-switches \ + --indent-cases \ + --indent-col1-comments \ + "$@" + diff --git a/simplelink.mk b/simplelink.mk @@ -7,8 +7,8 @@ all_executables: $(patsubst %,$(BUILD_DIR)/%,$(executables)) define simplelink = include $$(BUILD_DIR)/$(1)_main.c.deps.mk -$$(BUILD_DIR)/$(1): $$(LINKDEP_$(1)_main__c) $$(SCRIPTS_DIR)/link $$(BUILD_DIR)/$(1)_main.c.deps.mk - $$(SCRIPTS_DIR)/link -o '$$@' $$(LINKDEP_$(1)_main__c) +$$(BUILD_DIR)/$(1): $$(LINKDEP_$(subst -,__,$(1))_main__c) $$(SCRIPTS_DIR)/link $$(BUILD_DIR)/$(1)_main.c.deps.mk + $$(SCRIPTS_DIR)/link -o '$$@' $$(LINKDEP_$(subst -,__,$(1))_main__c) endef $(foreach var,$(executables),$(eval $(call simplelink,$(var)))) diff --git a/src/hello_main.c b/src/hello_main.c @@ -1,10 +0,0 @@ -#include <stdio.h> - -int main(int argc, char const *const *argv) -{ - if(printf("Hello %s!\n", "world") < 0) { - perror("error printing string"); - return 111; - } - return 0; -} diff --git a/src/lns-applyuidgid_main.c b/src/lns-applyuidgid_main.c @@ -0,0 +1,97 @@ +/* ISC license. */ + +#include <unistd.h> +#include <grp.h> +#include <limits.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <sys/capability.h> +#include <linux/securebits.h> + +#include <skalibs/types.h> +#include <skalibs/setgroups.h> +#include <skalibs/strerr.h> +#include <skalibs/sgetopt.h> +#include <skalibs/djbunix.h> +#include <skalibs/exec.h> + +#define USAGE "lns-applyuidgid [ -z ] [ -u uid ] [ -g gid ] [ -G gidlist ] [ -U ] iab_caps prog..." +#define dieusage() strerr_dieusage(100, USAGE) + +int main (int argc, char const *const *argv) +{ + uid_t uid = 0 ; + gid_t gid = 0 ; + gid_t gids[NGROUPS_MAX+1] ; + size_t gidn = (size_t)-1 ; + int unexport = 0 ; + PROG = "s6-applyuidgid" ; + { + subgetopt l = SUBGETOPT_ZERO ; + for (;;) + { + int opt = subgetopt_r(argc, argv, "zUu:g:G:", &l) ; + if (opt == -1) break ; + switch (opt) + { + case 'z' : unexport = 1 ; break ; + case 'u' : if (!uid0_scan(l.arg, &uid)) dieusage() ; break ; + case 'g' : if (!gid0_scan(l.arg, &gid)) dieusage() ; break ; + case 'G' : if (!gid_scanlist(gids, NGROUPS_MAX, l.arg, &gidn) && *l.arg) dieusage() ; break ; + case 'U' : + { + char const *x = getenv("UID") ; + if (!x) strerr_dienotset(100, "UID") ; + if (!uid0_scan(x, &uid)) strerr_dieinvalid(100, "UID") ; + x = getenv("GID") ; + if (!x) strerr_dienotset(100, "GID") ; + if (!gid0_scan(x, &gid)) strerr_dieinvalid(100, "GID") ; + x = getenv("GIDLIST") ; + if (!x) strerr_dienotset(100, "GIDLIST") ; + if (!gid_scanlist(gids, NGROUPS_MAX+1, x, &gidn) && *x) + strerr_dieinvalid(100, "GIDLIST") ; + break ; + } + default : dieusage() ; + } + } + argc -= l.ind ; argv += l.ind ; + } + if (argc < 2) dieusage() ; + + /* + The IAB 3-tuple of capability vectors (Inh, Amb and Bound), + captured in type cap_iab_t combine to pass capabilities + from one process to another through execve(2) system calls. + */ + + /* parse the first argument to obtain a set of capabilities */ + cap_iab_t new_iab; + new_iab = cap_iab_from_text(argv[0]); + if (new_iab == NULL) { + strerr_dieinvalid(100, "caps") ; + // strerr_dief1sys(100, "requested capabilities were not recognized"); + } + + if (prctl(PR_SET_SECUREBITS, + SECBIT_KEEP_CAPS | /* unneeded as NO_SETUID_FIXUP is superset */ + SECBIT_NO_SETUID_FIXUP | + SECBIT_NOROOT | /* disables suid and filecap privilege gain */ + SECBIT_NOROOT_LOCKED) < 0) { + strerr_dief1sys(111, "Failed to set securebits via prctl()"); + } + /* set these capabilities for the current process */ + if (cap_iab_set_proc(new_iab) != 0) { + strerr_dief1sys(111, "Failed to set capabilities via cap_set_proc()"); + } + + if (gidn != (size_t)-1 && setgroups_and_gid(gid ? gid : getegid(), gidn, gids) < 0) + strerr_diefu1sys(111, "set supplementary group list") ; + if (gid && setgid(gid) < 0) + strerr_diefu1sys(111, "setgid") ; + if (uid && setuid(uid) < 0) + strerr_diefu1sys(111, "setuid") ; + + if (unexport) xmexec_n(argv, "UID\0GID\0GIDLIST", 16, 3) ; + else xexec(&argv[1]) ; +} diff --git a/src/lns-envuidgid_main.c b/src/lns-envuidgid_main.c @@ -0,0 +1,180 @@ +/* Copy of s6-envuidgid.c from https://skarnet.org/software/s6 */ +/* ISC license. */ + +#include <string.h> +#include <pwd.h> +#include <grp.h> +#include <errno.h> +#include <limits.h> + +#include <skalibs/types.h> +#include <skalibs/sgetopt.h> +#include <skalibs/strerr.h> +#include <skalibs/djbunix.h> +#include <skalibs/exec.h> + +#define USAGE "lns-envuidgid [ -i | -D defaultuid:defaultgid:defaultgidlist ] [ -u | -g | -B ] [ -n ] account prog..." +#define dieusage() strerr_dieusage(100, USAGE) + +static inline size_t scan_defaults (char const *s, uid_t *uid, gid_t *gid, size_t *n, gid_t *tab) +{ + size_t pos = uid_scan(s, uid) ; + if (!pos) + { + if (*s != ':') return 0 ; + *uid = 0 ; + } + s += pos ; + if (!*s) goto zgid ; + if (*s++ != ':') return 0 ; + if (!*s) goto zgid ; + pos = gid_scan(s, gid) ; + if (!pos) + { + if (*s != ':') return 0 ; + *gid = 0 ; + } + s += pos ; + if (!*s) goto zn ; + if (*s++ != ':') return 0 ; + if (!*s) goto zn ; + return gid_scanlist(tab, NGROUPS_MAX, s, n) ; + + zgid: + *gid = 0 ; + zn: + *n = 0 ; + return 1 ; +} + +static int prot_readgroups (char const *name, gid_t *tab, unsigned int max) +{ + unsigned int n = 0 ; + for (;;) + { + struct group *gr ; + char **member ; + errno = 0 ; + if (n >= max) break ; + gr = getgrent() ; + if (!gr) break ; + for (member = gr->gr_mem ; *member ; member++) + if (!strcmp(name, *member)) break ; + if (*member) tab[n++] = gr->gr_gid ; + } + endgrent() ; + return errno ? -1 : n ; +} + +int main (int argc, char *const *argv) +{ + char const *user = 0 ; + char const *group = 0 ; + unsigned int what = 7 ; + int numfallback = 0 ; + int insist = 1 ; + uid_t uid ; + gid_t gid ; + size_t n ; + gid_t tab[NGROUPS_MAX] ; + PROG = "s6-envuidgid" ; + { + subgetopt l = SUBGETOPT_ZERO ; + for (;;) + { + int opt = subgetopt_r(argc, (char const *const *)argv, "ugBniD:", &l) ; + if (opt == -1) break ; + switch (opt) + { + case 'u' : what = 1 ; break ; + case 'g' : what = 2 ; break ; + case 'B' : what = 3 ; break ; + case 'n' : what &= 3 ; numfallback = 1 ; break ; + case 'i' : insist = 1 ; break ; + case 'D' : + if (!scan_defaults(l.arg, &uid, &gid, &n, tab)) dieusage() ; + insist = 0 ; + break ; + default : dieusage() ; + } + } + argc -= l.ind ; argv += l.ind ; + } + if (argc < 2) dieusage() ; + + switch (what) + { + case 7 : /* account */ + case 1 : /* user */ + user = argv[0] ; + break ; + case 2 : /* group */ + group = argv[0] ; + break ; + case 3 : /* both */ + { + char *colon = strchr(argv[0], ':') ; + user = argv[0] ; + if (colon) + { + *colon = 0 ; + group = colon + 1 ; + if (colon == argv[0]) user = 0 ; + if (!group[0]) group = 0 ; + } + break ; + } + default : strerr_dief1x(101, "inconsistent option management - please submit a bug-report") ; + } + + if (group) + { + struct group *gr = getgrnam(group) ; + if (gr) gid = gr->gr_gid ; + else if (numfallback && gid_scan(group, &gid)) ; + else if (insist) strerr_dief2x(1, "unknown group: ", group) ; + } + + if (user) + { + struct passwd *pw = getpwnam(user) ; + if (pw) + { + uid = pw->pw_uid ; + if (what == 7) + { + int r = prot_readgroups(argv[0], tab, NGROUPS_MAX) ; + if (r < 0) + strerr_diefu2sys(111, "get supplementary groups for ", argv[0]) ; + n = r ; + gid = pw->pw_gid ; + } + } + else if (numfallback && uid_scan(user, &uid)) ; + else if (insist) strerr_dief2x(1, "unknown user: ", user) ; + } + + { + size_t pos = 0 ; + char fmt[19 + UID_FMT + (NGROUPS_MAX+1) * GID_FMT] ; + if (what & 1) + { + memcpy(fmt + pos, "UID=", 4) ; pos += 4 ; + pos += uid_fmt(fmt + pos, uid) ; + fmt[pos++] = 0 ; + } + if (what & 2) + { + memcpy(fmt + pos, "GID=", 4) ; pos += 4 ; + pos += gid_fmt(fmt + pos, gid) ; + fmt[pos++] = 0 ; + } + if (what & 4) + { + memcpy(fmt + pos, "GIDLIST=", 8) ; pos += 8 ; + pos += gid_fmtlist(fmt + pos, tab, n) ; + fmt[pos++] = 0 ; + } + xmexec_m((char const *const *)argv + 1, fmt, pos) ; + } +} diff --git a/src/lns-lockdown_main.c b/src/lns-lockdown_main.c @@ -0,0 +1,284 @@ +#include <unistd.h> +#include <grp.h> +#include <limits.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/prctl.h> +#include <sys/capability.h> +#include <sys/mman.h> /* for mmap() */ +#include <sys/mount.h> +#include <sys/syscall.h> /* for SYS_pivot_root */ +#include <linux/filter.h> /* for seccomp */ +#include <linux/seccomp.h> /* for seccomp */ +#include <linux/securebits.h> /* for prctl(PR_SET_SECUREBITS, ...) */ + +#include <skalibs/types.h> +#include <skalibs/setgroups.h> +#include <skalibs/strerr.h> +#include <skalibs/sgetopt.h> +#include <skalibs/stralloc.h> +#include <skalibs/djbunix.h> +#include <skalibs/exec.h> + +#define USAGE "lns-lockdown [ -U iab_caps ] [ -S seccomp_bpf_fd ] [ -C chdir ] old-place-for-new-root new-place-for-old-root prog..." +#define dieusage() strerr_dieusage(100, USAGE) + +/*** data structures ***/ +struct drop_privs_s { + uid_t uid; + gid_t gid; + size_t gidn; + gid_t gids[NGROUPS_MAX + 1]; + cap_iab_t new_iab; +}; + +struct unexport_s { + size_t count; + stralloc sa; +}; + +/*** syscall wrappers ***/ +int syscall_pivot_root(char const *new_root, char const *put_old) { + return syscall(SYS_pivot_root, *new_root, *put_old); +} +int syscall_seccomp_set_filter(unsigned int flags, void *args) { + return syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, args); +} + +/*** utility functions ***/ +char const *const getenv_dienotset(char const *const name) { + char const *x = getenv(name); + if (!x) { + strerr_dienotset(100, name); + } + return x; +} + +int stralloc_catenv0(stralloc *sa, char const * env_name) { + char const *const value = getenv_dienotset(env_name); + return stralloc_catb(sa, value, strlen(value) + 1); +} + +/*** individual steps of dropping into sandbox ***/ +void pivot_and_umount( + char const *const old_place_for_new_root, + char const *const new_place_for_old_root, + struct unexport_s *const unex +) { + size_t mtp_count, mtp_n; + int oldroot_len = strlen(new_place_for_old_root); + stralloc sa_env = STRALLOC_ZERO; + stralloc sa_mtp = STRALLOC_ZERO; + if (!stralloc_catb(&sa_mtp, new_place_for_old_root, oldroot_len)) { + strerr_diefu1sys(111, "store mount path"); + } + if (!size_scan(getenv_dienotset("NS_MTP_COUNT"), &mtp_count)) { + strerr_dieinvalid(100, "NS_MTP_COUNT"); + } + if (!stralloc_catb(&unex->sa, "NS_MTP_COUNT", 13)) { + strerr_diefu1sys(111, "store env name"); + } + unex->count++; + + /* pivot_root */ + if (chdir(old_place_for_new_root) < 0) { + strerr_diefu2sys(111, "chdir to ", old_place_for_new_root) ; + } + if (syscall_pivot_root(".", new_place_for_old_root) < 0) { + strerr_diefu1sys(111, "pivot_root") ; + } + if (chroot(".") < 0) { + strerr_diefu1sys(111, "chroot") ; + } + + /* umount oldroot */ + char env_name[SIZE_FMT + 6] = "NS_MTP="; + size_t env_name_len; + for (mtp_n = 1; mtp_n <= mtp_count; mtp_n++) { + env_name_len = 6 + size_fmt(&env_name[6], mtp_n); + env_name[env_name_len] = 0; + sa_mtp.len = oldroot_len; + if (!stralloc_catb(&unex->sa, env_name, env_name_len)) { + strerr_diefu1sys(111, "store env name"); + } + unex->count++; + + sa_mtp.len = oldroot_len; + if (!stralloc_catenv0(&sa_mtp, getenv_dienotset(env_name))) { + strerr_diefu1sys(111, "store mount path"); + } + if (sa_mtp.s[oldroot_len] != '/') { + strerr_dieinvalid(100, env_name); + } + switch (umount(sa_mtp.s)) { + case 0: + break; + case EBUSY: + if (umount2(sa_mtp.s, MNT_DETACH)) { + strerr_diefu2sys(111, "to detach mountpoint", sa_mtp.s) ; + } + break; + default: + strerr_diefu2sys(111, "to umount", sa_mtp.s) ; + break; + } + } +} + +void drop(struct drop_privs_s const *const privs) { + /* make sure privileges are dropped permanently */ + if (prctl(PR_SET_SECUREBITS, + SECBIT_KEEP_CAPS | /* technically unneeded as NO_SETUID_FIXUP is superset */ + SECBIT_NO_SETUID_FIXUP | + SECBIT_NOROOT | /* disables suid and filecap privilege gain */ + SECBIT_NOROOT_LOCKED) < 0) { + strerr_dief1sys(111, "Failed to set securebits via prctl()"); + } + + /* set these capabilities for the current process */ + if (cap_iab_set_proc(privs->new_iab) != 0) { + strerr_dief1sys(111, "Failed to set capabilities via cap_set_proc()"); + } + + /* ancillary groups */ + if (privs->gidn != (size_t) -1 + && setgroups_and_gid(privs->gid ? privs->gid : getegid(), privs->gidn, + privs->gids) < 0) { + strerr_diefu1sys(111, "set supplementary group list"); + } + + /* primary group */ + if (privs->gid && setgid(privs->gid) < 0) { + strerr_diefu1sys(111, "setgid"); + } + + /* set userid */ + if (privs->uid && setuid(privs->uid) < 0) { + strerr_diefu1sys(111, "setuid"); + } +} + +void load_seccomp_program(int fd) { + strerr_dief(99, "seccomp program loading not yet implemented"); + struct sock_fprog seccomp_filter; + struct stat st; + if (fstat (fd, &st) < 0) { + strerr_diefu1sys(111, "fstat() the seccomp program fd"); + } + if (st.st_size > 0) { + void * addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { + strerr_diefu1sys(111, "mmap() the seccomp program fd"); + } + seccomp_filter.len = st.st_size / 8; + seccomp_filter.filter = (struct sock_filter *) addr; + } else { + stralloc sa = STRALLOC_ZERO; + char read_buf[4096]; + ssize_t read_bytes; + do { + read_bytes = read (fd, read_buf, sizeof(read_buf)); + if (read_bytes < 0 && errno != EINTR) { + strerr_diefu1sys(111, "read() the seccomp program fd"); + } else if (read_bytes > 0) { + if (!stralloc_catb(&sa, read_buf, read_bytes)) { + strerr_diefu1sys(111, "store seccomp program"); + } + } + } while (read_bytes != 0); + seccomp_filter.len = sa.len / 8; + seccomp_filter.filter = (struct sock_filter *) sa.s; + } + if (syscall_seccomp_set_filter(0, &seccomp_filter)) { + strerr_diefu1sys(111, "apply the seccomp BPF program"); + } +} + +int main (int argc, char const *const *argv) { + struct unexport_s unex = { 0, STRALLOC_ZERO }; + int drop_privs = 0; + struct drop_privs_s privs = { + .uid = 0, + .gid = 0, + .gidn = (size_t) -1, + }; + char const * chdir_to = 0; + int seccomp_fd = -1; + + PROG = "ns_pivot_into"; + { + subgetopt l = SUBGETOPT_ZERO; + for (;;) { + int opt = subgetopt_r(argc, argv, "U:S:C:", &l); + if (opt == -1) { + break; + } + switch (opt) { + case 'C' : /* chdir-to */ + chdir_to = l.arg; + break; + case 'S' : /* FD with seccomp BPF program */ + if (!int_scan(l.arg, &seccomp_fd)) { + strerr_dieinvalid(100, "seccomp BPF program file descriptor"); + } + if (ndelay_off(seccomp_fd)) { + strerr_diefu1sys(111, "make seccomp program fd non-blocking"); + } + if (coe(seccomp_fd)) { + strerr_diefu1sys(111, "make seccomp program fd close-on-exec"); + } + break; + case 'U' : /* setuid from env */ + drop_privs = 1; + /* + The IAB 3-tuple of capability vectors (Inh, Amb and Bound), + captured in type cap_iab_t combine to pass capabilities + from one process to another through execve(2) system calls. + */ + privs.new_iab = cap_iab_from_text(l.arg); + if (privs.new_iab == NULL) { + strerr_dieinvalid(100, "capability set"); + } + if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) { + strerr_dieinvalid(100, "UID"); + } + if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) { + strerr_dieinvalid(100, "GID"); + } + char const *x = getenv_dienotset("GIDLIST"); + if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) { + strerr_dieinvalid(100, "GIDLIST"); + } + if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) { + strerr_diefu1sys(111, "store env names"); + } + unex.count = 3; + break ; + default : + dieusage(); + } + } + argc -= l.ind ; + argv += l.ind ; + } + if (argc < 3) { + dieusage(); + } + pivot_and_umount(argv[1], argv[2], &unex); + if (drop_privs) { + drop(&privs); + } else if (seccomp_fd >= 0) { + if (prctl (PR_SET_NO_NEW_PRIVS, 1L, 0L, 0L, 0L)) { + strerr_diefu1sys(111, "enable NO_NEW_PRIVS for seccomp"); + } + } + if (seccomp_fd >= 0) { + load_seccomp_program(seccomp_fd); + } + if (chdir_to) { + if (chdir(chdir_to) < 0) { + strerr_diefu2sys(111, "chdir to ", chdir_to); + } + } + xmexec_n(&argv[3], unex.sa.s, unex.sa.len, unex.count); +} diff --git a/src/lns-mounts-to-env b/src/lns-mounts-to-env @@ -0,0 +1,44 @@ +#!/bin/sh +prog=$(awk ' +function el_quote(s) { + gsub(/\\/, "\\\\", s); # first double all backslashes + gsub(/"/, "\\\"", s); # then escape quote marks + return "\"" s "\"" # then surround with quote marks +} + +BEGIN { + # mount IDs seem to be unsigned, so lets use -1 to signify not found + max_id = -1 + root_id = -1 + count = 0 +} + +# read in /proc/self/mountinfo +$5 == "/" { root_id = $1 } +{ + max_id = max_id < $1 ? $1 : max_id + parents[$1] = $2 + mountpoints[$1] = $5 +} + +function print_umount(mtp){ + print "NS_MTP_" (++count) "=" el_quote(mtp) +} + +function recursively_umount(mount_id, id) { + for(id=max_id; id>=0; id--){ + if(parents[id] == mount_id){ + recursively_umount(id) + } + } + print_umount(mountpoints[mount_id]) +} + +END{ + if(root_id == -1) { exit 111 } + print "env" + recursively_umount(root_id) + print "NS_MTP_COUNT=" count +} +' /proc/self/mountinfo) || exit $? +exec execlineb -s0 -c "$prog \$@" "$@" diff --git a/src/lns-pidns_main.c b/src/lns-pidns_main.c @@ -0,0 +1,110 @@ +#include <fcntl.h> +#include <errno.h> +#include <sched.h> /* Definition of CLONE_* constants & unshare */ +#include <unistd.h> /* fork(), getpid() */ +#include <sys/wait.h> +#include <sys/select.h> +#include <assert.h> +#include <stdlib.h> /* exit() */ + +#include <skalibs/exec.h> +#include <skalibs/djbunix.h> +#include <skalibs/strerr.h> + +#define PROG "lns-pidns" + +void fd_cloexec(int fd) { + int flags = fcntl(fd, F_GETFD); + if(flags == -1) { + strerr_dief1sys(111, "fcntl() getfd"); + } + if(fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0) { + strerr_dief1sys(111, "fcntl() setfd"); + } +} + +void fd_nonblock(int fd) { + int flags = fcntl(fd, F_GETFL); + if(flags == -1) { + strerr_dief1sys(111, "fcntl() getfd"); + } + if(fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) { + strerr_dief1sys(111, "fcntl() setfd"); + } +} + +void nonblock_cloexec(int fd) { + fd_cloexec(fd); + fd_nonblock(fd); +} + +int main(const int argc, const char **argv) { + int piperw[2]; + #define parent_rfd piperw[0] + #define parent_wfd piperw[1] + /* returns EINVAL for some reason + if(pipe2(piperw, O_NONBLOCK | FD_CLOEXEC) != 0) { + strerr_dief1sys(111, "pipe2()"); + } + */ + if(pipe(piperw) != 0) { + strerr_dief1sys(111, "pipe()"); + } + nonblock_cloexec(parent_rfd); + nonblock_cloexec(parent_wfd); + if(unshare(CLONE_NEWPID) != 0) { + strerr_dief1sys(111, "unshare()"); + } + int fork1_pid = fork(); + if(fork1_pid < 0) { + strerr_dief1sys(111, "first fork()"); + } + if(fork1_pid == 0) { + /* child */ + assert(getpid() == 1); + if(close(parent_wfd) != 0) { + strerr_dief1sys(111, "close(parent_wfd)"); + } + int fork2_pid = fork(); + if(fork2_pid < 0) { + strerr_dief1sys(111, "second fork()"); + } + if(fork2_pid == 0) { + /* child */ + exec(&argv[1]); + } else { + /* parent */ + fd_set rfds; + struct timeval tv = {1, 0}; + int retval, wstatus; + pid_t pid; + FD_ZERO(&rfds); + FD_SET(parent_rfd, &rfds); + while(1) { + pid = waitpid(0, &wstatus, WNOHANG); + if(pid == fork2_pid) { + exit(wait_estatus(wstatus)); + } + retval = select(1, &rfds, NULL, NULL, &tv); + if (retval == -1 && errno != EINTR) { + strerr_dief1sys(111, "select()"); + } + if(retval > 0) { + const char term_msg[] = "pidns_run: parent died, terminating\n"; + write(2, "pidns_run: parent died, terminating\n", sizeof(term_msg)); + exit(111); + } + tv.tv_sec = 1; + tv.tv_usec = 0; + } + } + } else { + /* parent */ + if(close(parent_rfd) != 0) { + strerr_dief1sys(111, "close(parent_rfd)"); + } + int wstatus; + pid_t pid = waitpid(fork1_pid, &wstatus, 0); + exit(wait_estatus(wstatus)); + } +}