ns_sandbox.py (17463B)
1 #!/usr/bin/python3 2 import argparse 3 import ctypes 4 import dataclasses 5 import enum 6 import errno 7 import fcntl 8 import os 9 import os.path 10 import pathlib 11 import select 12 import stat 13 import subprocess 14 15 libc = ctypes.CDLL(None, use_errno=True) 16 CLONE_NEWNS = 0x00020000 # New mount namespace group 17 CLONE_NEWCGROUP = 0x02000000 # New cgroup namespace 18 CLONE_NEWUTS = 0x04000000 # New utsname namespace 19 CLONE_NEWIPC = 0x08000000 # New ipc namespace 20 CLONE_NEWUSER = 0x10000000 # New user namespace 21 CLONE_NEWPID = 0x20000000 # New pid namespace 22 CLONE_NEWNET = 0x40000000 # New network namespace 23 CLONE_NEWTIME = 0x00000080 # New time namespace 24 25 SYS_pivot_root = 155 26 27 MNT_FORCE = 1 28 MNT_DETACH = 2 29 MNT_EXPIRE = 4 30 UMOUNT_NOFOLLOW = 8 31 32 33 class MountFlag(int, enum.Enum): 34 """Mount flags.""" 35 36 #: Mount read-only. 37 RDONLY = 1 38 #: Ignore suid and sgid bits. 39 NOSUID = 2 40 #: Disallow access to device special files. 41 NODEV = 4 42 #: Disallow program execution. 43 NOEXEC = 8 44 #: Writes are synced at once. 45 SYNCHRONOUS = 16 46 #: Alter flags of a mounted FS. 47 REMOUNT = 32 48 #: Allow mandatory locks on an FS. 49 MANDLOCK = 64 50 #: Directory modifications are synchronous. 51 DIRSYNC = 128 52 #: Do not follow symlinks. 53 NOSYMFOLLOW = 256 54 #: Do not update access times. 55 NOATIME = 1024 56 #: Do not update directory access times. 57 NODIRATIME = 2048 58 #: Bind directory at different place. 59 BIND = 4096 60 MOVE = 8192 61 REC = 16384 62 SILENT = 32768 63 #: VFS does not apply the umask. 64 POSIXACL = 1 << 16 65 #: Change to unbindable. 66 UNBINDABLE = 1 << 17 67 #: Change to private. 68 PRIVATE = 1 << 18 69 #: Change to slave. 70 SLAVE = 1 << 19 71 #: Change to shared. 72 SHARED = 1 << 20 73 #: Update atime relative to mtime/ctime. 74 RELATIME = 1 << 21 75 #: This is a kern_mount call. 76 KERNMOUNT = 1 << 22 77 #: Update inode I_version field. 78 I_VERSION = 1 << 23 79 #: Always perform atime updates. 80 STRICTATIME = 1 << 24 81 #: Update the on-disk [acm]times lazily. 82 LAZYTIME = 1 << 25 83 ACTIVE = 1 << 30 84 NOUSER = 1 << 31 85 86 87 _mount = libc.mount 88 _mount.restype = ctypes.c_int 89 _mount.argtypes = ( 90 ctypes.c_char_p, 91 ctypes.c_char_p, 92 ctypes.c_char_p, 93 ctypes.c_ulong, 94 ctypes.c_void_p, 95 ) 96 97 _umount = libc.umount 98 _umount.restype = ctypes.c_int 99 _umount.argtypes = (ctypes.c_char_p,) 100 101 _umount2 = libc.umount2 102 _umount2.restype = ctypes.c_int 103 _umount2.argtypes = (ctypes.c_char_p, ctypes.c_int) 104 105 _unshare = libc.unshare 106 _unshare.restype = ctypes.c_int 107 _unshare.argtypes = (ctypes.c_int,) 108 109 110 def c_path(path): 111 if path is None: 112 return path 113 if isinstance(path, pathlib.PosixPath): 114 path = path.as_posix() 115 if isinstance(path, str): 116 path = path.encode() 117 return path 118 119 120 def c_error(): 121 return OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) 122 123 124 def unshare(flags): 125 if libc.unshare(flags) != 0: 126 raise c_error() 127 128 129 def pivot_root(new_root, put_old): 130 if libc.syscall(SYS_pivot_root, c_path(new_root), c_path(put_old)) != 0: 131 raise c_error() 132 133 134 def mount( 135 source: str, 136 target: str, 137 fstype: str, 138 flags: int = 0, 139 data: str = None, 140 ): 141 """Mount filesystem. 142 143 :param source: Device/source to mount. 144 :param target: Mountpoint. 145 :param fstype: Filesystem type. Available filesystem types can be found in /proc/filesystems. 146 :param flags: Mount flags. 147 :param data: Mount options for specified filesystem. 148 :raises OSError: If mount call failed with nonzero return code. 149 """ 150 if ( 151 _mount( 152 c_path(source), 153 c_path(target), 154 fstype.encode() if fstype is not None else fstype, 155 int(flags), 156 data.encode() if data is not None else data, 157 ) 158 != 0 159 ): 160 raise c_error() 161 162 163 def bind_mount( 164 source: str, 165 target: str, 166 write: bool = False, 167 ): 168 return mount( 169 source, 170 target, 171 None, 172 ( 173 MountFlag.BIND 174 | (0 if write else MountFlag.RDONLY) 175 | MountFlag.NOSUID 176 | MountFlag.NODEV 177 ), 178 ) 179 180 181 def umount(target: str): 182 """Unmount filesystem. 183 184 :param target: Mountpoint. 185 :raises OSError: If umount call failed with nonzero return code. 186 """ 187 if _umount(c_path(target)) != 0: 188 raise c_error() 189 190 191 def lazy_umount(target): 192 target = c_path(target) 193 if _umount(target) != 0: 194 if _umount2(target, MNT_DETACH) != 0: 195 raise c_error() 196 197 198 @dataclasses.dataclass(frozen=True) 199 class MountInfo: 200 id: int 201 parent: int 202 dev: tuple 203 root: str 204 mountpoint: str 205 206 def __post_init__(self): 207 assert isinstance(self.id, int) 208 assert isinstance(self.parent, int) 209 assert isinstance(self.dev, tuple) 210 minor, major = self.dev 211 assert isinstance(minor, int) 212 assert isinstance(major, int) 213 assert isinstance(self.root, str) 214 assert self.root[0] == '/' 215 assert isinstance(self.mountpoint, str) 216 assert self.mountpoint[0] == '/' 217 218 @classmethod 219 def from_line(cls, line): 220 rec = line.split(maxsplit=5) 221 major, minor = rec[2].split(':') 222 return cls( 223 id=int(rec[0]), 224 parent=int(rec[1]), 225 dev=(int(major), int(minor)), 226 root=rec[3], 227 mountpoint=rec[4], 228 ) 229 230 231 def parse_mountinfo(mountinfo_path='/proc/self/mountinfo'): 232 root_id = None 233 mountinfo = {} 234 with open(mountinfo_path, 'rt') as f: 235 for line in f: 236 mi = MountInfo.from_line(line) 237 if mi.mountpoint == '/': 238 assert root_id is None 239 root_id = mi.id 240 assert mi.id not in mountinfo 241 mountinfo[mi.id] = mi 242 assert root_id is not None 243 return (root_id, mountinfo) 244 245 246 def umount_order(mount_id, mountinfo): 247 for mi in mountinfo.values(): 248 if mi.parent == mount_id: 249 yield from umount_order(mi.id, mountinfo) 250 yield mountinfo[mount_id] 251 252 253 def pivot_and_umount(new_root, put_old, umount_list): 254 mtp_prefix = '/' + put_old.relative_to(new_root).as_posix() 255 pivot_root(new_root, put_old) 256 os.chdir('/') # so we don't stand in the old root 257 for mtp in umount_list: 258 lazy_umount(mtp_prefix + mtp) 259 260 261 def nonblock_cloexec(fd): 262 return fcntl.fcntl( 263 fd, 264 fcntl.F_SETFD, 265 fcntl.fcntl(fd, fcntl.F_GETFD) | os.O_NONBLOCK | fcntl.FD_CLOEXEC, 266 ) 267 268 269 def exit_status(status): 270 sig = status & 0xFF 271 ret = status >> 8 272 if sig: 273 raise SystemExit(128 + sig) 274 if ret >= 128: 275 raise SystemExit(128) 276 raise SystemExit(ret) 277 278 279 def exec_command(argv): 280 if argv[0][0] == '/': 281 os.execv(argv[0], argv) 282 for d in os.environ['PATH'].split(':'): 283 try: 284 os.execv(os.path.join(d, argv[0]), argv) 285 except FileNotFoundError: 286 continue 287 raise SystemExit(127) 288 289 290 def map_uid_gid(orig_uid, orig_gid): 291 with open('/proc/self/uid_map', 'wt') as f: 292 f.write(f'{orig_uid} {orig_uid} 1\n') 293 294 with open('/proc/self/setgroups', 'wt') as f: 295 f.write('deny\n') 296 297 with open('/proc/self/gid_map', 'wt') as f: 298 f.write(f'{orig_gid} {orig_gid} 1\n') 299 300 os.setuid(orig_uid) 301 os.setgid(orig_gid) 302 303 304 def pidns_run(unshare_flags, run_pid1=True): 305 (parent_rfd, parent_wfd) = os.pipe() 306 nonblock_cloexec(parent_rfd) 307 nonblock_cloexec(parent_wfd) 308 orig_uid = os.getuid() 309 orig_gid = os.getgid() 310 unshare(CLONE_NEWPID | unshare_flags) 311 if unshare_flags & CLONE_NEWUSER: 312 map_uid_gid(orig_uid, orig_gid) 313 fork_pid = os.fork() 314 if fork_pid == 0: 315 # child 316 assert os.getpid() == 1 317 os.close(parent_wfd) 318 if run_pid1: 319 return pidns_pid1(parent_rfd) 320 else: 321 return parent_rfd 322 else: 323 # parent 324 os.close(parent_rfd) 325 (pid, status) = os.waitpid(fork_pid, 0) 326 exit_status(status) 327 328 329 def pidns_pid1(parent_rfd): 330 fork2_pid = os.fork() 331 if fork2_pid == 0: 332 # child 333 return 334 else: 335 # parent 336 rlist, wlist, elist = (parent_rfd,), (), () 337 while True: 338 (pid, status) = os.waitpid(0, os.WNOHANG) 339 if pid == fork2_pid: 340 exit_status(status) 341 try: 342 r, w, x = select.select(rlist, wlist, elist, 1.0) 343 except select.error as e: 344 code, msg = e.args 345 # We might get interrupted by SIGCHLD here 346 if code != errno.EINTR: 347 raise 348 349 350 @dataclasses.dataclass(frozen=True) 351 class MountTMPFS: 352 path: pathlib.PosixPath 353 354 def __post_init__(self): 355 assert isinstance(self.path, pathlib.PosixPath) 356 assert not self.path.is_absolute() 357 358 def mount(self, root): 359 dst = root / self.path 360 dst.mkdir(parents=True, exist_ok=True) 361 mount('tmpfs', dst, 'tmpfs', MountFlag.NOSUID | MountFlag.NODEV) 362 363 364 @dataclasses.dataclass(frozen=True) 365 class MountBind: 366 src: pathlib.PosixPath 367 dst: pathlib.PosixPath 368 write: bool = False 369 370 def __post_init__(self): 371 assert isinstance(self.src, pathlib.PosixPath) 372 assert self.src.is_absolute() 373 assert isinstance(self.dst, pathlib.PosixPath) 374 assert not self.dst.is_absolute() 375 376 def mount(self, root): 377 dst = root / self.dst 378 if self.src.is_dir(): 379 dst.mkdir(parents=True, exist_ok=True) 380 bind_mount(self.src, dst, self.write) 381 382 383 def relpath(s): 384 p = pathlib.PosixPath(s) 385 return p.relative_to('/') if p.is_absolute() else p 386 387 388 def parse_mount(s): 389 m_type, rest = s.split(':', maxsplit=1) 390 if m_type == 'tmpfs': 391 return MountTMPFS(relpath(rest)) 392 elif m_type in ('rw_bind', 'ro_bind'): 393 write = m_type == 'rw_bind' 394 src, dst = rest.split(':', maxsplit=1) 395 return MountBind(pathlib.PosixPath(src), relpath(dst), write) 396 raise ValueError(m_type) 397 398 399 @dataclasses.dataclass(frozen=True) 400 class Settings: 401 versions: pathlib.PosixPath 402 root: pathlib.PosixPath 403 chdir: pathlib.PosixPath 404 vars: dict 405 command: tuple 406 extra_mount: tuple 407 drop_to: tuple = None 408 untar: pathlib.PosixPath = None 409 410 def __post_init__(self): 411 assert isinstance(self.command, tuple) 412 assert all(isinstance(arg, (str, bytes)) for arg in self.command) 413 414 assert isinstance(self.extra_mount, tuple) 415 assert all(isinstance(arg, (MountTMPFS, MountBind)) for arg in self.extra_mount) 416 417 assert isinstance(self.chdir, pathlib.PosixPath) 418 assert self.chdir.is_absolute() 419 420 assert isinstance(self.versions, pathlib.PosixPath) 421 assert self.versions.is_absolute() 422 assert self.versions.is_dir() 423 424 if self.drop_to is not None: 425 assert isinstance(self.drop_to, tuple) 426 uid, gid = self.drop_to 427 assert isinstance(uid, int) 428 assert isinstance(gid, int) 429 430 assert isinstance(self.untar, (pathlib.PosixPath, type(None))) 431 432 assert isinstance(self.root, pathlib.PosixPath) 433 assert self.root.is_absolute() 434 assert self.root.is_dir(), self.root 435 if self.untar is None: 436 self._check_root() 437 438 def _check_root(self): 439 assert (self.root / 'oldroot').is_dir() 440 assert (self.root / 'proc').is_dir() 441 assert (self.root / 'dev').is_dir() 442 assert (self.root / 'bin').is_dir() 443 assert (self.root / 'bin/sh').exists() 444 445 @classmethod 446 def from_args_and_env(cls, args, env): 447 if args.vars: 448 import yaml 449 450 with args.vars.open('rt') as f: 451 v = yaml.safe_load(f) 452 else: 453 v = {} 454 455 versions = args.versions 456 if versions is None and 'versions' in v: 457 versions = pathlib.PosixPath(v['versions']) 458 if versions is None and 'pthbs_versions' in env: 459 versions = pathlib.PosixPath(env['pthbs_versions']) 460 if versions is None: 461 raise ValueError("Versions directory not specified as argument, yaml nor environment") 462 463 return cls( 464 versions=versions, 465 root=args.root_dir, 466 chdir=args.chdir, 467 vars=v, 468 command=tuple(args.command), 469 extra_mount=tuple(args.extra_mount) if args.extra_mount is not None else (), 470 drop_to=( 471 (int(env['pthbs_uid']), int(env['pthbs_gid'])) 472 if args.mode == 'root' 473 else None 474 ), 475 untar=args.untar and pathlib.PosixPath(args.untar), 476 ) 477 478 479 def userns_sandbox_run(settings): 480 assert settings.untar is None 481 assert settings.drop_to is None 482 mount('proc', settings.root / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV) 483 if not (settings.root / 'dev/null').is_char_device(): 484 mount( 485 '/dev', 486 settings.root / 'dev', 487 None, 488 (MountFlag.BIND | MountFlag.NOSUID | MountFlag.REC), 489 ) 490 491 mountpoints = [ 492 MountTMPFS(relpath('/dev/shm')), 493 ] 494 mountpoints.extend(settings.extra_mount) 495 mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/'))) 496 for m in mountpoints: 497 m.mount(settings.root) 498 499 os.chroot(str(settings.root)) 500 os.chdir(settings.chdir) 501 exec_command(settings.command) 502 503 504 def mkchardev(path, major, minor, mode): 505 if isinstance(path, pathlib.PosixPath): 506 path = path.as_posix() 507 os.mknod( 508 path, 509 mode=mode | stat.S_IFCHR, 510 device=os.makedev(major, minor), 511 ) 512 513 514 def mkblockdev(path, major, minor, mode): 515 if isinstance(path, pathlib.PosixPath): 516 path = path.as_posix() 517 os.mknod( 518 path, 519 mode=mode | stat.S_IFBLK, 520 device=os.makedev(major, minor), 521 ) 522 523 524 def mknod_dev(dev): 525 mkchardev(mode=0o666, major=1, minor=3, path=dev / "null") 526 mkchardev(mode=0o666, major=1, minor=7, path=dev / "full") 527 mkchardev(mode=0o666, major=5, minor=2, path=dev / "ptmx") 528 mkchardev(mode=0o644, major=1, minor=8, path=dev / "random") 529 mkchardev(mode=0o644, major=1, minor=9, path=dev / "urandom") 530 mkchardev(mode=0o666, major=1, minor=5, path=dev / "zero") 531 mkchardev(mode=0o666, major=5, minor=0, path=dev / "tty") 532 (dev / "fd").symlink_to("/proc/self/fd") 533 (dev / "stdin").symlink_to("/proc/self/fd/0") 534 (dev / "stdout").symlink_to("/proc/self/fd/1") 535 (dev / "stderr").symlink_to("/proc/self/fd/2") 536 537 538 def root_sandbox_setup(settings): 539 uid, gid = settings.drop_to 540 os.umask(0) 541 to_umount = [mi.mountpoint for mi in umount_order(*parse_mountinfo())] 542 r = settings.root 543 if settings.untar: 544 mount('sandbox_root', r, 'tmpfs', MountFlag.NOSUID) 545 (r / 'oldroot').mkdir() 546 subprocess.check_call( 547 ('/bin/tar', 'xpf', settings.untar.absolute()), 548 shell=False, 549 cwd=r, 550 ) 551 mount('proc', r / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV) 552 if not (r / 'dev/null').is_char_device(): 553 mknod_dev(r / 'dev') 554 555 mountpoints = [ 556 MountTMPFS(relpath('/dev/shm')), 557 ] 558 mountpoints.extend(settings.extra_mount) 559 mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/'))) 560 for m in mountpoints: 561 m.mount(r) 562 563 if settings.untar: 564 mount( 565 'tmpfs', 566 r, 567 '', 568 (MountFlag.REMOUNT | MountFlag.RDONLY | MountFlag.NOSUID), 569 ) 570 pivot_and_umount(r, r / 'oldroot', to_umount) 571 os.setgid(gid) 572 os.setuid(uid) 573 os.chdir(settings.chdir) 574 575 576 def main(args, env): 577 settings = Settings.from_args_and_env(args, env) 578 if args.mode == 'userns': 579 pidns_run( 580 CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID, 581 ) 582 userns_sandbox_run(settings) 583 else: 584 pidns_run( 585 CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID, 586 ) 587 unshare(CLONE_NEWNS) 588 root_sandbox_setup(settings) 589 os.umask(0o022) 590 exec_command(settings.command) 591 592 593 argument_parser = argparse.ArgumentParser( 594 description="Linux namespaces based sandbox for pthbs", 595 allow_abbrev=False, 596 ) 597 argument_parser.add_argument( 598 '--mode', 599 '-m', 600 required=True, 601 choices=('userns', 'root'), 602 help="sandbox mode", 603 ) 604 argument_parser.add_argument( 605 '--vars', 606 '-y', 607 type=pathlib.PosixPath, 608 help="vars.yaml to read configuration from", 609 ) 610 argument_parser.add_argument( 611 '--versions', 612 '-V', 613 type=pathlib.PosixPath, 614 help="versions dir (e.g. /versions)", 615 ) 616 argument_parser.add_argument( 617 '--chdir', 618 '-C', 619 type=pathlib.PosixPath, 620 default=pathlib.PosixPath('/'), 621 help="set working directory inside sandbox", 622 ) 623 argument_parser.add_argument( 624 '--untar', 625 '-f', 626 type=pathlib.PosixPath, 627 default=pathlib.PosixPath(os.getcwd()), 628 help="initial structure for build tmpfs", 629 ) 630 argument_parser.add_argument('--extra-mount', action='append', type=parse_mount) 631 argument_parser.add_argument('root_dir', type=pathlib.PosixPath) 632 argument_parser.add_argument('command', nargs='+') 633 634 635 if __name__ == '__main__': 636 args = argument_parser.parse_args() 637 main(args, os.environ) 638 639 # pylama:linters=pycodestyle,pyflakes:ignore=D212,D203,D100,D101,D102,D107 640 # vim: sts=4 ts=4 sw=4 et tw=88 efm=%A%f\:%l\:%c\ %t%n\ %m