ns_sandbox.py (17118B)
1 #!/usr/bin/python3 2 import argparse 3 import ctypes 4 import dataclasses 5 import enum 6 import errno 7 import fcntl 8 import os 9 import os.path 10 import pathlib 11 import select 12 import stat 13 import subprocess 14 15 libc = ctypes.CDLL(None, use_errno=True) 16 CLONE_NEWNS = 0x00020000 # New mount namespace group 17 CLONE_NEWCGROUP = 0x02000000 # New cgroup namespace 18 CLONE_NEWUTS = 0x04000000 # New utsname namespace 19 CLONE_NEWIPC = 0x08000000 # New ipc namespace 20 CLONE_NEWUSER = 0x10000000 # New user namespace 21 CLONE_NEWPID = 0x20000000 # New pid namespace 22 CLONE_NEWNET = 0x40000000 # New network namespace 23 CLONE_NEWTIME = 0x00000080 # New time namespace 24 25 SYS_pivot_root = 155 26 27 MNT_FORCE = 1 28 MNT_DETACH = 2 29 MNT_EXPIRE = 4 30 UMOUNT_NOFOLLOW = 8 31 32 33 class MountFlag(int, enum.Enum): 34 """Mount flags.""" 35 36 #: Mount read-only. 37 RDONLY = 1 38 #: Ignore suid and sgid bits. 39 NOSUID = 2 40 #: Disallow access to device special files. 41 NODEV = 4 42 #: Disallow program execution. 43 NOEXEC = 8 44 #: Writes are synced at once. 45 SYNCHRONOUS = 16 46 #: Alter flags of a mounted FS. 47 REMOUNT = 32 48 #: Allow mandatory locks on an FS. 49 MANDLOCK = 64 50 #: Directory modifications are synchronous. 51 DIRSYNC = 128 52 #: Do not follow symlinks. 53 NOSYMFOLLOW = 256 54 #: Do not update access times. 55 NOATIME = 1024 56 #: Do not update directory access times. 57 NODIRATIME = 2048 58 #: Bind directory at different place. 59 BIND = 4096 60 MOVE = 8192 61 REC = 16384 62 SILENT = 32768 63 #: VFS does not apply the umask. 64 POSIXACL = 1 << 16 65 #: Change to unbindable. 66 UNBINDABLE = 1 << 17 67 #: Change to private. 68 PRIVATE = 1 << 18 69 #: Change to slave. 70 SLAVE = 1 << 19 71 #: Change to shared. 72 SHARED = 1 << 20 73 #: Update atime relative to mtime/ctime. 74 RELATIME = 1 << 21 75 #: This is a kern_mount call. 76 KERNMOUNT = 1 << 22 77 #: Update inode I_version field. 78 I_VERSION = 1 << 23 79 #: Always perform atime updates. 80 STRICTATIME = 1 << 24 81 #: Update the on-disk [acm]times lazily. 82 LAZYTIME = 1 << 25 83 ACTIVE = 1 << 30 84 NOUSER = 1 << 31 85 86 87 _mount = libc.mount 88 _mount.restype = ctypes.c_int 89 _mount.argtypes = ( 90 ctypes.c_char_p, 91 ctypes.c_char_p, 92 ctypes.c_char_p, 93 ctypes.c_ulong, 94 ctypes.c_void_p, 95 ) 96 97 _umount = libc.umount 98 _umount.restype = ctypes.c_int 99 _umount.argtypes = (ctypes.c_char_p,) 100 101 _umount2 = libc.umount2 102 _umount2.restype = ctypes.c_int 103 _umount2.argtypes = (ctypes.c_char_p, ctypes.c_int) 104 105 _unshare = libc.unshare 106 _unshare.restype = ctypes.c_int 107 _unshare.argtypes = (ctypes.c_int,) 108 109 110 def c_path(path): 111 if path is None: 112 return path 113 if isinstance(path, pathlib.PosixPath): 114 path = path.as_posix() 115 if isinstance(path, str): 116 path = path.encode() 117 return path 118 119 120 def c_error(): 121 return OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) 122 123 124 def unshare(flags): 125 if libc.unshare(flags) != 0: 126 raise c_error() 127 128 129 def pivot_root(new_root, put_old): 130 if libc.syscall(SYS_pivot_root, c_path(new_root), c_path(put_old)) != 0: 131 raise c_error() 132 133 134 def mount( 135 source: str, 136 target: str, 137 fstype: str, 138 flags: int = 0, 139 data: str = None, 140 ): 141 """Mount filesystem. 142 143 :param source: Device/source to mount. 144 :param target: Mountpoint. 145 :param fstype: Filesystem type. Available filesystem types can be found in /proc/filesystems. 146 :param flags: Mount flags. 147 :param data: Mount options for specified filesystem. 148 :raises OSError: If mount call failed with nonzero return code. 149 """ 150 if ( 151 _mount( 152 c_path(source), 153 c_path(target), 154 fstype.encode() if fstype is not None else fstype, 155 int(flags), 156 data.encode() if data is not None else data, 157 ) 158 != 0 159 ): 160 raise c_error() 161 162 163 def bind_mount( 164 source: str, 165 target: str, 166 write: bool = False, 167 ): 168 return mount( 169 source, 170 target, 171 None, 172 ( 173 MountFlag.BIND 174 | (0 if write else MountFlag.RDONLY) 175 | MountFlag.NOSUID 176 | MountFlag.NODEV 177 ), 178 ) 179 180 181 def umount(target: str): 182 """Unmount filesystem. 183 184 :param target: Mountpoint. 185 :raises OSError: If umount call failed with nonzero return code. 186 """ 187 if _umount(c_path(target)) != 0: 188 raise c_error() 189 190 191 def lazy_umount(target): 192 target = c_path(target) 193 if _umount(target) != 0: 194 if _umount2(target, MNT_DETACH) != 0: 195 raise c_error() 196 197 198 @dataclasses.dataclass(frozen=True) 199 class MountInfo: 200 id: int 201 parent: int 202 dev: tuple 203 root: str 204 mountpoint: str 205 206 def __post_init__(self): 207 assert isinstance(self.id, int) 208 assert isinstance(self.parent, int) 209 assert isinstance(self.dev, tuple) 210 minor, major = self.dev 211 assert isinstance(minor, int) 212 assert isinstance(major, int) 213 assert isinstance(self.root, str) 214 assert self.root[0] == '/' 215 assert isinstance(self.mountpoint, str) 216 assert self.mountpoint[0] == '/' 217 218 @classmethod 219 def from_line(cls, line): 220 rec = line.split(maxsplit=5) 221 major, minor = rec[2].split(':') 222 return cls( 223 id=int(rec[0]), 224 parent=int(rec[1]), 225 dev=(int(major), int(minor)), 226 root=rec[3], 227 mountpoint=rec[4], 228 ) 229 230 231 def parse_mountinfo(mountinfo_path='/proc/self/mountinfo'): 232 root_id = None 233 mountinfo = {} 234 with open(mountinfo_path, 'rt') as f: 235 for line in f: 236 mi = MountInfo.from_line(line) 237 if mi.mountpoint == '/': 238 assert root_id is None 239 root_id = mi.id 240 assert mi.id not in mountinfo 241 mountinfo[mi.id] = mi 242 assert root_id is not None 243 return (root_id, mountinfo) 244 245 246 def umount_order(mount_id, mountinfo): 247 for mi in mountinfo.values(): 248 if mi.parent == mount_id: 249 yield from umount_order(mi.id, mountinfo) 250 yield mountinfo[mount_id] 251 252 253 def pivot_and_umount(new_root, put_old, umount_list): 254 mtp_prefix = '/' + put_old.relative_to(new_root).as_posix() 255 pivot_root(new_root, put_old) 256 os.chdir('/') # so we don't stand in the old root 257 for mtp in umount_list: 258 lazy_umount(mtp_prefix + mtp) 259 260 261 def nonblock_cloexec(fd): 262 return fcntl.fcntl( 263 fd, 264 fcntl.F_SETFD, 265 fcntl.fcntl(fd, fcntl.F_GETFD) | os.O_NONBLOCK | fcntl.FD_CLOEXEC, 266 ) 267 268 269 def exit_status(status): 270 sig = status & 0xFF 271 ret = status >> 8 272 if sig: 273 raise SystemExit(128 + sig) 274 if ret >= 128: 275 raise SystemExit(128) 276 raise SystemExit(ret) 277 278 279 def exec_command(argv): 280 if argv[0][0] == '/': 281 os.execv(argv[0], argv) 282 for d in os.environ['PATH'].split(':'): 283 try: 284 os.execv(os.path.join(d, argv[0]), argv) 285 except FileNotFoundError: 286 continue 287 raise SystemExit(127) 288 289 290 def map_uid_gid(orig_uid, orig_gid): 291 with open('/proc/self/uid_map', 'wt') as f: 292 f.write(f'{orig_uid} {orig_uid} 1\n') 293 294 with open('/proc/self/setgroups', 'wt') as f: 295 f.write('deny\n') 296 297 with open('/proc/self/gid_map', 'wt') as f: 298 f.write(f'{orig_gid} {orig_gid} 1\n') 299 300 os.setuid(orig_uid) 301 os.setgid(orig_gid) 302 303 304 def pidns_run(unshare_flags, run_pid1=True): 305 (parent_rfd, parent_wfd) = os.pipe() 306 nonblock_cloexec(parent_rfd) 307 nonblock_cloexec(parent_wfd) 308 orig_uid = os.getuid() 309 orig_gid = os.getgid() 310 unshare(CLONE_NEWPID | unshare_flags) 311 if unshare_flags & CLONE_NEWUSER: 312 map_uid_gid(orig_uid, orig_gid) 313 fork_pid = os.fork() 314 if fork_pid == 0: 315 # child 316 assert os.getpid() == 1 317 os.close(parent_wfd) 318 if run_pid1: 319 return pidns_pid1(parent_rfd) 320 else: 321 return parent_rfd 322 else: 323 # parent 324 os.close(parent_rfd) 325 (pid, status) = os.waitpid(fork_pid, 0) 326 exit_status(status) 327 328 329 def pidns_pid1(parent_rfd): 330 fork2_pid = os.fork() 331 if fork2_pid == 0: 332 # child 333 return 334 else: 335 # parent 336 rlist, wlist, elist = (parent_rfd,), (), () 337 while True: 338 (pid, status) = os.waitpid(0, os.WNOHANG) 339 if pid == fork2_pid: 340 exit_status(status) 341 try: 342 r, w, x = select.select(rlist, wlist, elist, 1.0) 343 except select.error as e: 344 code, msg = e.args 345 # We might get interrupted by SIGCHLD here 346 if code != errno.EINTR: 347 raise 348 349 350 @dataclasses.dataclass(frozen=True) 351 class MountTMPFS: 352 path: pathlib.PosixPath 353 354 def __post_init__(self): 355 assert isinstance(self.path, pathlib.PosixPath) 356 assert not self.path.is_absolute() 357 358 def mount(self, root): 359 dst = root / self.path 360 dst.mkdir(parents=True, exist_ok=True) 361 mount('tmpfs', dst, 'tmpfs', MountFlag.NOSUID | MountFlag.NODEV) 362 363 364 @dataclasses.dataclass(frozen=True) 365 class MountBind: 366 src: pathlib.PosixPath 367 dst: pathlib.PosixPath 368 write: bool = False 369 370 def __post_init__(self): 371 assert isinstance(self.src, pathlib.PosixPath) 372 assert self.src.is_absolute() 373 assert isinstance(self.dst, pathlib.PosixPath) 374 assert not self.dst.is_absolute() 375 376 def mount(self, root): 377 dst = root / self.dst 378 if self.src.is_dir(): 379 dst.mkdir(parents=True, exist_ok=True) 380 bind_mount(self.src, dst, self.write) 381 382 383 def relpath(s): 384 p = pathlib.PosixPath(s) 385 return p.relative_to('/') if p.is_absolute() else p 386 387 388 def parse_mount(s): 389 m_type, rest = s.split(':', maxsplit=1) 390 if m_type == 'tmpfs': 391 return MountTMPFS(relpath(rest)) 392 elif m_type in ('rw_bind', 'ro_bind'): 393 write = m_type == 'rw_bind' 394 src, dst = rest.split(':', maxsplit=1) 395 return MountBind(pathlib.PosixPath(src), relpath(dst), write) 396 raise ValueError(m_type) 397 398 399 @dataclasses.dataclass(frozen=True) 400 class Settings: 401 versions: pathlib.PosixPath 402 root: pathlib.PosixPath 403 chdir: pathlib.PosixPath 404 vars: dict 405 command: tuple 406 extra_mount: tuple 407 drop_to: tuple = None 408 untar: pathlib.PosixPath = None 409 410 def __post_init__(self): 411 assert isinstance(self.command, tuple) 412 assert all(isinstance(arg, (str, bytes)) for arg in self.command) 413 414 assert isinstance(self.extra_mount, tuple) 415 assert all(isinstance(arg, (MountTMPFS, MountBind)) for arg in self.extra_mount) 416 417 assert isinstance(self.chdir, pathlib.PosixPath) 418 assert self.chdir.is_absolute() 419 420 assert isinstance(self.versions, pathlib.PosixPath) 421 assert self.versions.is_absolute() 422 assert self.versions.is_dir() 423 424 if self.drop_to is not None: 425 assert isinstance(self.drop_to, tuple) 426 uid, gid = self.drop_to 427 assert isinstance(uid, int) 428 assert isinstance(gid, int) 429 430 assert isinstance(self.untar, (pathlib.PosixPath, type(None))) 431 432 assert isinstance(self.root, pathlib.PosixPath) 433 assert self.root.is_absolute() 434 assert self.root.is_dir(), self.root 435 if self.untar is None: 436 self._check_root() 437 438 def _check_root(self): 439 assert (self.root / 'oldroot').is_dir() 440 assert (self.root / 'proc').is_dir() 441 assert (self.root / 'dev').is_dir() 442 assert (self.root / 'bin').is_dir() 443 assert (self.root / 'bin/sh').exists() 444 445 @classmethod 446 def from_args_and_env(cls, args, env): 447 if args.vars: 448 import yaml 449 450 with args.vars.open('rt') as f: 451 v = yaml.safe_load(f) 452 else: 453 v = {} 454 455 return cls( 456 versions=(args.versions or pathlib.PosixPath(v['versions'])), 457 root=args.root_dir, 458 chdir=args.chdir, 459 vars=v, 460 command=tuple(args.command), 461 extra_mount=tuple(args.extra_mount) if args.extra_mount is not None else (), 462 drop_to=( 463 (int(env['pthbs_uid']), int(env['pthbs_gid'])) 464 if args.mode == 'root' 465 else None 466 ), 467 untar=args.untar and pathlib.PosixPath(args.untar), 468 ) 469 470 471 def userns_sandbox_run(settings): 472 assert settings.untar is None 473 assert settings.drop_to is None 474 mount('proc', settings.root / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV) 475 if not (settings.root / 'dev/null').is_char_device(): 476 mount( 477 '/dev', 478 settings.root / 'dev', 479 None, 480 (MountFlag.BIND | MountFlag.NOSUID | MountFlag.REC), 481 ) 482 483 mountpoints = [ 484 MountTMPFS(relpath('/dev/shm')), 485 ] 486 mountpoints.extend(settings.extra_mount) 487 mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/'))) 488 for m in mountpoints: 489 m.mount(settings.root) 490 491 os.chroot(str(settings.root)) 492 os.chdir(settings.chdir) 493 exec_command(settings.command) 494 495 496 def mkchardev(path, major, minor, mode): 497 if isinstance(path, pathlib.PosixPath): 498 path = path.as_posix() 499 os.mknod( 500 path, 501 mode=mode | stat.S_IFCHR, 502 device=os.makedev(major, minor), 503 ) 504 505 506 def mkblockdev(path, major, minor, mode): 507 if isinstance(path, pathlib.PosixPath): 508 path = path.as_posix() 509 os.mknod( 510 path, 511 mode=mode | stat.S_IFBLK, 512 device=os.makedev(major, minor), 513 ) 514 515 516 def mknod_dev(dev): 517 mkchardev(mode=0o666, major=1, minor=3, path=dev / "null") 518 mkchardev(mode=0o666, major=1, minor=7, path=dev / "full") 519 mkchardev(mode=0o666, major=5, minor=2, path=dev / "ptmx") 520 mkchardev(mode=0o644, major=1, minor=8, path=dev / "random") 521 mkchardev(mode=0o644, major=1, minor=9, path=dev / "urandom") 522 mkchardev(mode=0o666, major=1, minor=5, path=dev / "zero") 523 mkchardev(mode=0o666, major=5, minor=0, path=dev / "tty") 524 (dev / "fd").symlink_to("/proc/self/fd") 525 (dev / "stdin").symlink_to("/proc/self/fd/0") 526 (dev / "stdout").symlink_to("/proc/self/fd/1") 527 (dev / "stderr").symlink_to("/proc/self/fd/2") 528 529 530 def root_sandbox_setup(settings): 531 uid, gid = settings.drop_to 532 os.umask(0) 533 to_umount = [mi.mountpoint for mi in umount_order(*parse_mountinfo())] 534 r = settings.root 535 if settings.untar: 536 mount('sandbox_root', r, 'tmpfs', MountFlag.NOSUID) 537 (r / 'oldroot').mkdir() 538 subprocess.check_call( 539 ('/bin/tar', 'xpf', settings.untar.absolute()), 540 shell=False, 541 cwd=r, 542 ) 543 mount('proc', r / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV) 544 if not (r / 'dev/null').is_char_device(): 545 mknod_dev(r / 'dev') 546 547 mountpoints = [ 548 MountTMPFS(relpath('/dev/shm')), 549 ] 550 mountpoints.extend(settings.extra_mount) 551 mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/'))) 552 for m in mountpoints: 553 m.mount(r) 554 555 if settings.untar: 556 mount( 557 'tmpfs', 558 r, 559 '', 560 (MountFlag.REMOUNT | MountFlag.RDONLY | MountFlag.NOSUID), 561 ) 562 pivot_and_umount(r, r / 'oldroot', to_umount) 563 os.setgid(gid) 564 os.setuid(uid) 565 os.chdir(settings.chdir) 566 567 568 def main(args, env): 569 settings = Settings.from_args_and_env(args, env) 570 if args.mode == 'userns': 571 pidns_run( 572 CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID, 573 ) 574 userns_sandbox_run(settings) 575 else: 576 pidns_run( 577 CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID, 578 ) 579 unshare(CLONE_NEWNS) 580 root_sandbox_setup(settings) 581 os.umask(0o022) 582 exec_command(settings.command) 583 584 585 argument_parser = argparse.ArgumentParser( 586 description="Linux namespaces based sandbox for pthbs", 587 allow_abbrev=False, 588 ) 589 argument_parser.add_argument( 590 '--mode', 591 '-m', 592 required=True, 593 choices=('userns', 'root'), 594 help="sandbox mode", 595 ) 596 argument_parser.add_argument( 597 '--vars', 598 '-y', 599 type=pathlib.PosixPath, 600 help="vars.yaml to read configuration from", 601 ) 602 argument_parser.add_argument( 603 '--versions', 604 '-V', 605 type=pathlib.PosixPath, 606 help="versions dir (e.g. /versions)", 607 ) 608 argument_parser.add_argument( 609 '--chdir', 610 '-C', 611 type=pathlib.PosixPath, 612 default=pathlib.PosixPath('/'), 613 help="set working directory inside sandbox", 614 ) 615 argument_parser.add_argument( 616 '--untar', 617 '-f', 618 type=pathlib.PosixPath, 619 default=pathlib.PosixPath(os.getcwd()), 620 help="initial structure for build tmpfs", 621 ) 622 argument_parser.add_argument('--extra-mount', action='append', type=parse_mount) 623 argument_parser.add_argument('root_dir', type=pathlib.PosixPath) 624 argument_parser.add_argument('command', nargs='+') 625 626 627 if __name__ == '__main__': 628 args = argument_parser.parse_args() 629 main(args, os.environ) 630 631 # pylama:linters=pycodestyle,pyflakes:ignore=D212,D203,D100,D101,D102,D107 632 # vim: sts=4 ts=4 sw=4 et tw=88 efm=%A%f\:%l\:%c\ %t%n\ %m