lns-lockdown_main.c (9665B)
1 #include <unistd.h> 2 #include <grp.h> 3 #include <limits.h> 4 #include <stdlib.h> 5 #include <errno.h> 6 #include <sys/prctl.h> 7 #include <sys/capability.h> 8 #include <sys/mman.h> /* for mmap() */ 9 #include <sys/mount.h> 10 #include <sys/syscall.h> /* for SYS_pivot_root */ 11 #include <linux/filter.h> /* for seccomp */ 12 #include <linux/seccomp.h> /* for seccomp */ 13 #include <linux/securebits.h> /* for prctl(PR_SET_SECUREBITS, ...) */ 14 15 #include <skalibs/types.h> 16 #include <skalibs/setgroups.h> 17 #include <skalibs/strerr.h> 18 #include <skalibs/sgetopt.h> 19 #include <skalibs/stralloc.h> 20 #include <skalibs/djbunix.h> 21 #include <skalibs/exec.h> 22 23 #define USAGE "lns-lockdown [ -U iab_caps ] [ -S seccomp_bpf_fd ] [ -C chdir ] old-place-for-new-root new-place-for-old-root prog..." 24 #define dieusage() strerr_dieusage(100, USAGE) 25 26 /*** data structures ***/ 27 struct drop_privs_s { 28 uid_t uid; 29 gid_t gid; 30 size_t gidn; 31 gid_t gids[NGROUPS_MAX + 1]; 32 cap_iab_t new_iab; 33 }; 34 35 struct unexport_s { 36 size_t count; 37 stralloc sa; 38 }; 39 40 /*** syscall wrappers ***/ 41 int syscall_pivot_root(char const *new_root, char const *put_old) { 42 return syscall(SYS_pivot_root, *new_root, *put_old); 43 } 44 int syscall_seccomp_set_filter(unsigned int flags, void *args) { 45 return syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, args); 46 } 47 48 /*** utility functions ***/ 49 char const *const getenv_dienotset(char const *const name) { 50 char const *x = getenv(name); 51 if (!x) { 52 strerr_dienotset(100, name); 53 } 54 return x; 55 } 56 57 int stralloc_catenv0(stralloc *sa, char const * env_name) { 58 char const *const value = getenv_dienotset(env_name); 59 return stralloc_catb(sa, value, strlen(value) + 1); 60 } 61 62 /*** individual steps of dropping into sandbox ***/ 63 void pivot_and_umount( 64 char const *const old_place_for_new_root, 65 char const *const new_place_for_old_root, 66 struct unexport_s *const unex 67 ) { 68 size_t mtp_count, mtp_n; 69 int oldroot_len = strlen(new_place_for_old_root); 70 stralloc sa_env = STRALLOC_ZERO; 71 stralloc sa_mtp = STRALLOC_ZERO; 72 if (!stralloc_catb(&sa_mtp, new_place_for_old_root, oldroot_len)) { 73 strerr_diefu1sys(111, "store mount path"); 74 } 75 if (!size_scan(getenv_dienotset("NS_MTP_COUNT"), &mtp_count)) { 76 strerr_dieinvalid(100, "NS_MTP_COUNT"); 77 } 78 if (!stralloc_catb(&unex->sa, "NS_MTP_COUNT", 13)) { 79 strerr_diefu1sys(111, "store env name"); 80 } 81 unex->count++; 82 83 /* pivot_root */ 84 if (chdir(old_place_for_new_root) < 0) { 85 strerr_diefu2sys(111, "chdir to ", old_place_for_new_root) ; 86 } 87 if (syscall_pivot_root(".", new_place_for_old_root) < 0) { 88 strerr_diefu1sys(111, "pivot_root") ; 89 } 90 if (chroot(".") < 0) { 91 strerr_diefu1sys(111, "chroot") ; 92 } 93 94 /* umount oldroot */ 95 char env_name[SIZE_FMT + 6] = "NS_MTP="; 96 size_t env_name_len; 97 for (mtp_n = 1; mtp_n <= mtp_count; mtp_n++) { 98 env_name_len = 6 + size_fmt(&env_name[6], mtp_n); 99 env_name[env_name_len] = 0; 100 sa_mtp.len = oldroot_len; 101 if (!stralloc_catb(&unex->sa, env_name, env_name_len)) { 102 strerr_diefu1sys(111, "store env name"); 103 } 104 unex->count++; 105 106 sa_mtp.len = oldroot_len; 107 if (!stralloc_catenv0(&sa_mtp, getenv_dienotset(env_name))) { 108 strerr_diefu1sys(111, "store mount path"); 109 } 110 if (sa_mtp.s[oldroot_len] != '/') { 111 strerr_dieinvalid(100, env_name); 112 } 113 switch (umount(sa_mtp.s)) { 114 case 0: 115 break; 116 case EBUSY: 117 if (umount2(sa_mtp.s, MNT_DETACH)) { 118 strerr_diefu2sys(111, "to detach mountpoint", sa_mtp.s) ; 119 } 120 break; 121 default: 122 strerr_diefu2sys(111, "to umount", sa_mtp.s) ; 123 break; 124 } 125 } 126 } 127 128 void drop(struct drop_privs_s const *const privs) { 129 /* make sure privileges are dropped permanently */ 130 if (prctl(PR_SET_SECUREBITS, 131 SECBIT_KEEP_CAPS | /* technically unneeded as NO_SETUID_FIXUP is superset */ 132 SECBIT_NO_SETUID_FIXUP | 133 SECBIT_NOROOT | /* disables suid and filecap privilege gain */ 134 SECBIT_NOROOT_LOCKED) < 0) { 135 strerr_dief1sys(111, "Failed to set securebits via prctl()"); 136 } 137 138 /* set these capabilities for the current process */ 139 if (cap_iab_set_proc(privs->new_iab) != 0) { 140 strerr_dief1sys(111, "Failed to set capabilities via cap_set_proc()"); 141 } 142 143 /* ancillary groups */ 144 if (privs->gidn != (size_t) -1 145 && setgroups_and_gid(privs->gid ? privs->gid : getegid(), privs->gidn, 146 privs->gids) < 0) { 147 strerr_diefu1sys(111, "set supplementary group list"); 148 } 149 150 /* primary group */ 151 if (privs->gid && setgid(privs->gid) < 0) { 152 strerr_diefu1sys(111, "setgid"); 153 } 154 155 /* set userid */ 156 if (privs->uid && setuid(privs->uid) < 0) { 157 strerr_diefu1sys(111, "setuid"); 158 } 159 } 160 161 void load_seccomp_program(int fd) { 162 strerr_dief(99, "seccomp program loading not yet implemented"); 163 struct sock_fprog seccomp_filter; 164 struct stat st; 165 if (fstat (fd, &st) < 0) { 166 strerr_diefu1sys(111, "fstat() the seccomp program fd"); 167 } 168 if (st.st_size > 0) { 169 void * addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 170 if (addr == MAP_FAILED) { 171 strerr_diefu1sys(111, "mmap() the seccomp program fd"); 172 } 173 seccomp_filter.len = st.st_size / 8; 174 seccomp_filter.filter = (struct sock_filter *) addr; 175 } else { 176 stralloc sa = STRALLOC_ZERO; 177 char read_buf[4096]; 178 ssize_t read_bytes; 179 do { 180 read_bytes = read (fd, read_buf, sizeof(read_buf)); 181 if (read_bytes < 0 && errno != EINTR) { 182 strerr_diefu1sys(111, "read() the seccomp program fd"); 183 } else if (read_bytes > 0) { 184 if (!stralloc_catb(&sa, read_buf, read_bytes)) { 185 strerr_diefu1sys(111, "store seccomp program"); 186 } 187 } 188 } while (read_bytes != 0); 189 seccomp_filter.len = sa.len / 8; 190 seccomp_filter.filter = (struct sock_filter *) sa.s; 191 } 192 if (syscall_seccomp_set_filter(0, &seccomp_filter)) { 193 strerr_diefu1sys(111, "apply the seccomp BPF program"); 194 } 195 } 196 197 int main (int argc, char const *const *argv) { 198 struct unexport_s unex = { 0, STRALLOC_ZERO }; 199 int drop_privs = 0; 200 struct drop_privs_s privs = { 201 .uid = 0, 202 .gid = 0, 203 .gidn = (size_t) -1, 204 }; 205 char const * chdir_to = 0; 206 int seccomp_fd = -1; 207 208 PROG = "ns_pivot_into"; 209 { 210 subgetopt l = SUBGETOPT_ZERO; 211 for (;;) { 212 int opt = subgetopt_r(argc, argv, "U:S:C:", &l); 213 if (opt == -1) { 214 break; 215 } 216 switch (opt) { 217 case 'C' : /* chdir-to */ 218 chdir_to = l.arg; 219 break; 220 case 'S' : /* FD with seccomp BPF program */ 221 if (!int_scan(l.arg, &seccomp_fd)) { 222 strerr_dieinvalid(100, "seccomp BPF program file descriptor"); 223 } 224 if (ndelay_off(seccomp_fd)) { 225 strerr_diefu1sys(111, "make seccomp program fd non-blocking"); 226 } 227 if (coe(seccomp_fd)) { 228 strerr_diefu1sys(111, "make seccomp program fd close-on-exec"); 229 } 230 break; 231 case 'U' : /* setuid from env */ 232 drop_privs = 1; 233 /* 234 The IAB 3-tuple of capability vectors (Inh, Amb and Bound), 235 captured in type cap_iab_t combine to pass capabilities 236 from one process to another through execve(2) system calls. 237 */ 238 privs.new_iab = cap_iab_from_text(l.arg); 239 if (privs.new_iab == NULL) { 240 strerr_dieinvalid(100, "capability set"); 241 } 242 if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) { 243 strerr_dieinvalid(100, "UID"); 244 } 245 if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) { 246 strerr_dieinvalid(100, "GID"); 247 } 248 char const *x = getenv_dienotset("GIDLIST"); 249 if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) { 250 strerr_dieinvalid(100, "GIDLIST"); 251 } 252 if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) { 253 strerr_diefu1sys(111, "store env names"); 254 } 255 unex.count = 3; 256 break ; 257 default : 258 dieusage(); 259 } 260 } 261 argc -= l.ind ; 262 argv += l.ind ; 263 } 264 if (argc < 3) { 265 dieusage(); 266 } 267 pivot_and_umount(argv[1], argv[2], &unex); 268 if (drop_privs) { 269 drop(&privs); 270 } else if (seccomp_fd >= 0) { 271 if (prctl (PR_SET_NO_NEW_PRIVS, 1L, 0L, 0L, 0L)) { 272 strerr_diefu1sys(111, "enable NO_NEW_PRIVS for seccomp"); 273 } 274 } 275 if (seccomp_fd >= 0) { 276 load_seccomp_program(seccomp_fd); 277 } 278 if (chdir_to) { 279 if (chdir(chdir_to) < 0) { 280 strerr_diefu2sys(111, "chdir to ", chdir_to); 281 } 282 } 283 xmexec_n(&argv[3], unex.sa.s, unex.sa.len, unex.count); 284 }