userns_sandbox.py (11410B)
1 #!/usr/bin/python3 2 import argparse 3 import ctypes 4 import dataclasses 5 import enum 6 import errno 7 import fcntl 8 import os 9 import os.path 10 import pathlib 11 import select 12 13 libc = ctypes.CDLL(None, use_errno=True) 14 CLONE_NEWNS = 0x00020000 # New mount namespace group 15 CLONE_NEWCGROUP = 0x02000000 # New cgroup namespace 16 CLONE_NEWUTS = 0x04000000 # New utsname namespace 17 CLONE_NEWIPC = 0x08000000 # New ipc namespace 18 CLONE_NEWUSER = 0x10000000 # New user namespace 19 CLONE_NEWPID = 0x20000000 # New pid namespace 20 CLONE_NEWNET = 0x40000000 # New network namespace 21 CLONE_NEWTIME = 0x00000080 # New time namespace 22 23 SYS_pivot_root = 155 24 25 26 class MountFlag(int, enum.Enum): 27 """Mount flags.""" 28 29 #: Mount read-only. 30 RDONLY = 1 31 #: Ignore suid and sgid bits. 32 NOSUID = 2 33 #: Disallow access to device special files. 34 NODEV = 4 35 #: Disallow program execution. 36 NOEXEC = 8 37 #: Writes are synced at once. 38 SYNCHRONOUS = 16 39 #: Alter flags of a mounted FS. 40 REMOUNT = 32 41 #: Allow mandatory locks on an FS. 42 MANDLOCK = 64 43 #: Directory modifications are synchronous. 44 DIRSYNC = 128 45 #: Do not follow symlinks. 46 NOSYMFOLLOW = 256 47 #: Do not update access times. 48 NOATIME = 1024 49 #: Do not update directory access times. 50 NODIRATIME = 2048 51 #: Bind directory at different place. 52 BIND = 4096 53 MOVE = 8192 54 REC = 16384 55 SILENT = 32768 56 #: VFS does not apply the umask. 57 POSIXACL = 1 << 16 58 #: Change to unbindable. 59 UNBINDABLE = 1 << 17 60 #: Change to private. 61 PRIVATE = 1 << 18 62 #: Change to slave. 63 SLAVE = 1 << 19 64 #: Change to shared. 65 SHARED = 1 << 20 66 #: Update atime relative to mtime/ctime. 67 RELATIME = 1 << 21 68 #: This is a kern_mount call. 69 KERNMOUNT = 1 << 22 70 #: Update inode I_version field. 71 I_VERSION = 1 << 23 72 #: Always perform atime updates. 73 STRICTATIME = 1 << 24 74 #: Update the on-disk [acm]times lazily. 75 LAZYTIME = 1 << 25 76 ACTIVE = 1 << 30 77 NOUSER = 1 << 31 78 79 80 _mount = libc.mount 81 _mount.restype = ctypes.c_int 82 _mount.argtypes = ( 83 ctypes.c_char_p, 84 ctypes.c_char_p, 85 ctypes.c_char_p, 86 ctypes.c_ulong, 87 ctypes.c_void_p, 88 ) 89 90 _umount = libc.umount 91 _umount.restype = ctypes.c_int 92 _umount.argtypes = (ctypes.c_char_p,) 93 94 95 def c_path(path): 96 if path is None: 97 return path 98 if isinstance(path, pathlib.PosixPath): 99 path = path.as_posix() 100 if isinstance(path, str): 101 path = path.encode() 102 return path 103 104 105 def c_error(): 106 return OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) 107 108 109 def mount( 110 source: str, 111 target: str, 112 fstype: str, 113 flags: int = 0, 114 data: str = None, 115 ): 116 """Mount filesystem. 117 118 :param source: Device/source to mount. 119 :param target: Mountpoint. 120 :param fstype: Filesystem type. Available filesystem types can be found in /proc/filesystems. 121 :param flags: Mount flags. 122 :param data: Mount options for specified filesystem. 123 :raises OSError: If mount call failed with nonzero return code. 124 """ 125 if ( 126 _mount( 127 c_path(source), 128 c_path(target), 129 fstype.encode() if fstype is not None else fstype, 130 int(flags), 131 data.encode() if data is not None else data, 132 ) 133 != 0 134 ): 135 raise c_error() 136 137 138 def bind_mount( 139 source: str, 140 target: str, 141 write: bool = False, 142 ): 143 return mount( 144 source, 145 target, 146 None, 147 ( 148 MountFlag.BIND 149 | (0 if write else MountFlag.RDONLY) 150 | MountFlag.NOSUID 151 | MountFlag.NODEV 152 ), 153 ) 154 155 156 def umount(target: str): 157 """Unmount filesystem. 158 159 :param target: Mountpoint. 160 :raises OSError: If umount call failed with nonzero return code. 161 """ 162 if _umount(c_path(target)) != 0: 163 raise c_error() 164 165 166 def parse_mountinfo(mountinfo_path='/proc/self/mountinfo'): 167 raise NotImplementedError() 168 169 170 def recursive_umount(mount_id, mountinfo): 171 raise NotImplementedError() 172 173 174 def nonblock_cloexec(fd): 175 return fcntl.fcntl( 176 fd, 177 fcntl.F_SETFD, 178 fcntl.fcntl(fd, fcntl.F_GETFD) | os.O_NONBLOCK | fcntl.FD_CLOEXEC, 179 ) 180 181 182 def exit_status(status): 183 sig = status & 0xFF 184 ret = status >> 8 185 if sig: 186 raise SystemExit(128 + sig) 187 if ret >= 128: 188 raise SystemExit(128) 189 raise SystemExit(ret) 190 191 192 def exec_command(argv): 193 if argv[0][0] == '/': 194 os.execv(argv[0], argv) 195 for d in os.environ['PATH'].split(':'): 196 try: 197 os.execv(os.path.join(d, argv[0]), argv) 198 except FileNotFoundError: 199 continue 200 raise SystemExit(127) 201 202 203 def map_uid_gid(orig_uid, orig_gid): 204 with open('/proc/self/uid_map', 'wt') as f: 205 f.write(f'{orig_uid} {orig_uid} 1\n') 206 207 with open('/proc/self/setgroups', 'wt') as f: 208 f.write('deny\n') 209 210 with open('/proc/self/gid_map', 'wt') as f: 211 f.write(f'{orig_gid} {orig_gid} 1\n') 212 213 os.setuid(orig_uid) 214 os.setgid(orig_gid) 215 216 217 def pidns_run(unshare_flags, continuation, *args, **kwargs): 218 (parent_rfd, parent_wfd) = os.pipe() 219 nonblock_cloexec(parent_rfd) 220 nonblock_cloexec(parent_wfd) 221 orig_uid = os.getuid() 222 orig_gid = os.getgid() 223 if libc.unshare(CLONE_NEWPID | unshare_flags) != 0: 224 raise c_error() 225 if unshare_flags & CLONE_NEWUSER: 226 map_uid_gid(orig_uid, orig_gid) 227 fork_pid = os.fork() 228 if fork_pid == 0: 229 # child 230 assert os.getpid() == 1 231 os.close(parent_wfd) 232 fork2_pid = os.fork() 233 if fork2_pid == 0: 234 # child 235 continuation(*args, **kwargs) 236 else: 237 # parent 238 rlist, wlist, elist = (parent_rfd,), (), () 239 while True: 240 (pid, status) = os.waitpid(0, os.WNOHANG) 241 if pid == fork2_pid: 242 exit_status(status) 243 try: 244 r, w, x = select.select(rlist, wlist, elist, 1.0) 245 except select.error as e: 246 code, msg = e.args 247 # We might get interrupted by SIGCHLD here 248 if code != errno.EINTR: 249 raise 250 else: 251 # parent 252 os.close(parent_rfd) 253 (pid, status) = os.waitpid(fork_pid, 0) 254 exit_status(status) 255 256 257 @dataclasses.dataclass(frozen=True) 258 class MountTMPFS: 259 path: pathlib.PosixPath 260 261 def __post_init__(self): 262 assert isinstance(self.path, pathlib.PosixPath) 263 assert not self.path.is_absolute() 264 265 def mount(self, root): 266 dst = root / self.path 267 dst.mkdir(parents=True, exist_ok=True) 268 mount('tmpfs', dst, 'tmpfs', MountFlag.NOSUID | MountFlag.NODEV) 269 270 271 @dataclasses.dataclass(frozen=True) 272 class MountBind: 273 src: pathlib.PosixPath 274 dst: pathlib.PosixPath 275 write: bool = False 276 277 def __post_init__(self): 278 assert isinstance(self.src, pathlib.PosixPath) 279 assert self.src.is_absolute() 280 assert isinstance(self.dst, pathlib.PosixPath) 281 assert not self.dst.is_absolute() 282 283 def mount(self, root): 284 dst = root / self.dst 285 if self.src.is_dir(): 286 dst.mkdir(parents=True, exist_ok=True) 287 bind_mount(self.src, dst, self.write) 288 289 290 def relpath(s): 291 p = pathlib.PosixPath(s) 292 return p.relative_to('/') if p.is_absolute() else p 293 294 295 def parse_mount(s): 296 m_type, rest = s.split(':', maxsplit=1) 297 if m_type == 'tmpfs': 298 return MountTMPFS(relpath(rest)) 299 elif m_type in ('rw_bind', 'ro_bind'): 300 write = m_type == 'rw_bind' 301 src, dst = rest.split(':', maxsplit=1) 302 return MountBind(pathlib.PosixPath(src), relpath(dst), write) 303 raise ValueError(m_type) 304 305 306 @dataclasses.dataclass(frozen=True) 307 class Settings: 308 versions: pathlib.PosixPath 309 root: pathlib.PosixPath 310 chdir: pathlib.PosixPath 311 environment: str 312 vars: dict 313 command: tuple 314 extra_mount: tuple 315 316 def __post_init__(self): 317 assert isinstance(self.command, tuple) 318 assert all(isinstance(arg, (str, bytes)) for arg in self.command) 319 320 assert isinstance(self.extra_mount, tuple) 321 assert all(isinstance(arg, (MountTMPFS, MountBind)) for arg in self.extra_mount) 322 323 assert isinstance(self.chdir, pathlib.PosixPath) 324 assert self.chdir.is_absolute() 325 326 assert isinstance(self.versions, pathlib.PosixPath) 327 assert self.versions.is_absolute() 328 assert self.versions.is_dir() 329 assert (self.versions / self.environment).is_dir() 330 331 self._check_root() 332 333 def _check_root(self): 334 assert isinstance(self.root, pathlib.PosixPath) 335 assert self.root.is_absolute() 336 assert self.root.is_dir() 337 assert (self.root / 'oldroot').is_dir() 338 assert (self.root / 'proc').is_dir() 339 assert (self.root / 'dev').is_dir() 340 assert (self.root / 'bin').is_dir() 341 assert (self.root / 'bin/sh').exists() 342 343 @classmethod 344 def from_args(cls, args): 345 if args.vars: 346 import yaml 347 348 with args.vars.open('rt') as f: 349 v = yaml.safe_load(f) 350 else: 351 v = {} 352 353 return cls( 354 versions=(args.versions or pathlib.PosixPath(v['versions'])), 355 root=args.root_dir, 356 chdir=args.chdir, 357 environment=args.environment, 358 vars=v, 359 command=tuple(args.command), 360 extra_mount=tuple(args.extra_mount), 361 ) 362 363 364 def sandbox_run(settings, command): 365 mount('proc', settings.root / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV) 366 if not (settings.root / 'dev/null').is_char_device(): 367 mount( 368 '/dev', 369 settings.root / 'dev', 370 None, 371 (MountFlag.BIND | MountFlag.NOSUID | MountFlag.REC), 372 ) 373 374 mountpoints = [ 375 MountTMPFS(relpath('/dev/shm')), 376 ] 377 mountpoints.extend(settings.extra_mount) 378 mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/'))) 379 for m in mountpoints: 380 m.mount(settings.root) 381 382 os.chroot(str(settings.root)) 383 os.chdir(settings.chdir) 384 exec_command(command) 385 386 387 def main(args): 388 pidns_run( 389 CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID, 390 sandbox_run, 391 Settings.from_args(args), 392 args.command, 393 ) 394 395 396 argument_parser = argparse.ArgumentParser( 397 description="User namespaces based sandbox for pthbs", 398 allow_abbrev=False, 399 ) 400 argument_parser.add_argument( 401 '--vars', 402 '-y', 403 type=pathlib.PosixPath, 404 help="vars.yaml to read configuration from", 405 ) 406 argument_parser.add_argument( 407 '--versions', 408 '-V', 409 type=pathlib.PosixPath, 410 help="versions dir (e.g. /versions)", 411 ) 412 argument_parser.add_argument( 413 '--chdir', 414 '-C', 415 type=pathlib.PosixPath, 416 default=pathlib.PosixPath(os.getcwd()), 417 help="set working directory inside sandbox", 418 ) 419 argument_parser.add_argument('--extra-mount', action='append', type=parse_mount) 420 argument_parser.add_argument('root_dir', type=pathlib.PosixPath) 421 argument_parser.add_argument('environment') 422 argument_parser.add_argument('command', nargs='+') 423 424 425 if __name__ == '__main__': 426 args = argument_parser.parse_args() 427 main(args) 428 429 # pylama:linters=pycodestyle,pyflakes:ignore=D212,D203,D100,D101,D102,D107 430 # vim: sts=4 ts=4 sw=4 et tw=88 efm=%A%f\:%l\:%c\ %t%n\ %m