commit 4d6cedce86a3c62d2013529b0e79284c09ec061b
parent fb50d2d0714f9efbc59b783d0159cedba1122e79
Author: Jan Pobříslo <ccx@te2000.cz>
Date: Thu, 22 Feb 2024 03:54:02 +0100
Prototype namespace-based sandbox
Diffstat:
2 files changed, 375 insertions(+), 1 deletion(-)
diff --git a/command/pthbs-build b/command/pthbs-build
@@ -139,7 +139,12 @@ function at_filehash(hash_type, file_hash, dst, dstdir){
sandbox_cmd=sandbox_cmd " -m " q("allow/read+/bin/***")
sandbox_cmd=sandbox_cmd " -munshare/net:1 -munshare/ipc:1"
} else {
- fatal("set $PTHBS_SYD to enable sandboxing")
+ sandbox_cmd=" "q(ENVIRON["basedir"]"/userns_sandbox.py")
+ sandbox_cmd=sandbox_cmd" --vars="q(ENVIRON["basedir"]"/vars.yaml")
+ sandbox_cmd=sandbox_cmd" --extra-mount=tmpfs:"q(ENVIRON["basedir"]"/work")
+ sandbox_cmd=sandbox_cmd" --extra-mount=rw_bind:"q(ENVIRON["workdir"])
+ sandbox_cmd=sandbox_cmd" "q(ENVIRON["basedir"]"/work/root")
+ sandbox_cmd=sandbox_cmd" env_ignored"
}
} else {
sandbox_cmd=""
diff --git a/userns_sandbox.py b/userns_sandbox.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+import sys
+import os
+import os.path
+import ctypes
+import fcntl
+import select
+import errno
+import argparse
+import enum
+import pathlib
+import dataclasses
+
+libc = ctypes.CDLL(None, use_errno=True)
+CLONE_NEWNS = 0x00020000 # New mount namespace group
+CLONE_NEWCGROUP = 0x02000000 # New cgroup namespace
+CLONE_NEWUTS = 0x04000000 # New utsname namespace
+CLONE_NEWIPC = 0x08000000 # New ipc namespace
+CLONE_NEWUSER = 0x10000000 # New user namespace
+CLONE_NEWPID = 0x20000000 # New pid namespace
+CLONE_NEWNET = 0x40000000 # New network namespace
+CLONE_NEWTIME = 0x00000080 # New time namespace
+
+SYS_pivot_root = 155
+
+class MountFlag(int, enum.Enum):
+ """Mount flags."""
+
+ #: Mount read-only.
+ RDONLY = 1
+ #: Ignore suid and sgid bits.
+ NOSUID = 2
+ #: Disallow access to device special files.
+ NODEV = 4
+ #: Disallow program execution.
+ NOEXEC = 8
+ #: Writes are synced at once.
+ SYNCHRONOUS = 16
+ #: Alter flags of a mounted FS.
+ REMOUNT = 32
+ #: Allow mandatory locks on an FS.
+ MANDLOCK = 64
+ #: Directory modifications are synchronous.
+ DIRSYNC = 128
+ #: Do not follow symlinks.
+ NOSYMFOLLOW = 256
+ #: Do not update access times.
+ NOATIME = 1024
+ #: Do not update directory access times.
+ NODIRATIME = 2048
+ #: Bind directory at different place.
+ BIND = 4096
+ MOVE = 8192
+ REC = 16384
+ SILENT = 32768
+ #: VFS does not apply the umask.
+ POSIXACL = 1 << 16
+ #: Change to unbindable.
+ UNBINDABLE = 1 << 17
+ #: Change to private.
+ PRIVATE = 1 << 18
+ #: Change to slave.
+ SLAVE = 1 << 19
+ #: Change to shared.
+ SHARED = 1 << 20
+ #: Update atime relative to mtime/ctime.
+ RELATIME = 1 << 21
+ #: This is a kern_mount call.
+ KERNMOUNT = 1 << 22
+ #: Update inode I_version field.
+ I_VERSION = 1 << 23
+ #: Always perform atime updates.
+ STRICTATIME = 1 << 24
+ #: Update the on-disk [acm]times lazily.
+ LAZYTIME = 1 << 25
+ ACTIVE = 1 << 30
+ NOUSER = 1 << 31
+
+_mount = libc.mount
+_mount.restype = c_int
+_mount.argtypes = (c_char_p, c_char_p, c_char_p, c_ulong, c_void_p)
+
+_umount = libc.umount
+_umount.restype = c_int
+_umount.argtypes = (c_char_p)
+
+_chroot = libc.chroot
+_chroot.restype = c_int
+_chroot.argtypes = (c_char_p)
+
+
+def c_path(path):
+ if isinstance(path, PosixPath):
+ path = path.as_posix()
+ if isinstance(path, str):
+ path = path.encode()
+ return path
+
+
+def mount(
+ source: str,
+ target: str,
+ fstype: str,
+ flags: int | MountFlag = 0,
+ data: str | None = None,
+):
+ """Mount filesystem.
+
+ :param source: Device/source to mount.
+ :param target: Mountpoint.
+ :param fstype: Filesystem type. Available filesystem types can be found in /proc/filesystems.
+ :param flags: Mount flags.
+ :param data: Mount options for specified filesystem.
+ :raises OSError: If mount call failed with nonzero return code.
+ """
+ if (
+ _mount(
+ c_path(source.encode),
+ c_path(target.encode),
+ fstype.encode(),
+ int(flags),
+ data.encode() if data is not None else data,
+ )
+ != 0
+ ):
+ raise OSError(get_errno(), strerror(get_errno()))
+
+
+def bind_mount(
+ source: str,
+ target: str,
+ write: bool = False,
+):
+ return mount(source, target, "", (
+ MountFlag.BIND
+ | (0 if write else MountFlag.RDONLY)
+ | MountFlag.NOSUID
+ | MountFlag.NODEV
+ ))
+
+
+def umount(target: str)
+ """Unmount filesystem.
+
+ :param target: Mountpoint.
+ :raises OSError: If umount call failed with nonzero return code.
+ """
+ if (_umount(c_path(target)) != 0):
+ raise OSError(get_errno(), strerror(get_errno()))
+
+
+def parse_mountinfo(mountinfo_path='/proc/self/mountinfo'):
+ raise NotImplementedError()
+
+
+def recursive_umount(mount_id, mountinfo):
+ raise NotImplementedError()
+
+
+def nonblock_cloexec(fd):
+ return fcntl.fcntl(
+ fd,
+ fcntl.F_SETFD,
+ fcntl.fcntl(fd, fcntl.F_GETFD) | os.O_NONBLOCK | fcntl.FD_CLOEXEC,
+ )
+
+
+def exit_status(status):
+ sig = status & 0xff
+ ret = status >> 8
+ if sig:
+ raise SystemExit(128 + sig)
+ if ret >= 128:
+ raise SystemExit(128)
+ raise SystemExit(ret)
+
+
+def exec_command(argv):
+ if argv[0][0] == '/':
+ os.execv(argv[0], argv)
+ for d in os.environ['PATH'].split(':'):
+ try:
+ os.execv(os.path.join(d, argv[0]), argv)
+ except FileNotFoundError:
+ continue
+ raise SystemExit(127)
+
+
+def pidns_run(unshare_flags, continuation, *args, **kwargs):
+ (parent_rfd, parent_wfd) = os.pipe()
+ nonblock_cloexec(parent_rfd)
+ nonblock_cloexec(parent_wfd)
+ if libc.unshare(CLONE_NEWPID | unshare_flags) != 0:
+ raise OSError(ctypes.get_errno())
+ fork_pid = os.fork()
+ if fork_pid == 0:
+ # child
+ assert os.getpid() == 1
+ os.close(parent_wfd)
+ fork2_pid = os.fork()
+ if fork2_pid == 0:
+ # child
+ continuation(*args, **kwargs)
+ else:
+ # parent
+ rlist, wlist, elist = (parent_rfd,), (), ()
+ while True:
+ (pid, status) = os.waitpid(0, os.WNOHANG)
+ if pid == fork2_pid:
+ exit_status(status)
+ try:
+ r, w, x = select.select(rlist, wlist, elist, 1.0)
+ except select.error as e:
+ code, msg = e.args
+ # We might get interrupted by SIGCHLD here
+ if code != errno.EINTR:
+ raise
+ else:
+ # parent
+ os.close(parent_rfd)
+ (pid, status) = os.waitpid(fork_pid, 0)
+ exit_status(status)
+
+
+@dataclasses.dataclass(frozen=True, slots=True):
+class MountTMPFS:
+ path: pathlib.PosixPath
+
+ def __post_init__(self):
+ assert isinstance(self.path, pathlib.PosixPath)
+ assert not self.path.is_absolute()
+
+ def mount(self, root):
+ dst = root / self.path
+ dst.mkdir(parents=True, exist_ok=True)
+ mount('tmpfs', dst, 'tmpfs', MountFlag.NOSUID | MountFlag.NODEV)
+
+
+@dataclasses.dataclass(frozen=True, slots=True):
+class MountBind:
+ src: pathlib.PosixPath
+ dst: pathlib.PosixPath
+ write: bool
+
+ def __post_init__(self):
+ assert isinstance(self.src, pathlib.PosixPath)
+ assert self.src.is_absolute()
+ assert isinstance(self.dst, pathlib.PosixPath)
+ assert not self.dst.is_absolute()
+
+ def mount(self, root):
+ dst = root / self.dst
+ if self.src.is_dir():
+ dst.mkdir(parents=True, exist_ok=True)
+ bind_mount(self.src, dst, self.write)
+
+
+def relpath(s):
+ p = pathlib.PosixPath(s)
+ return p.relative_to('/') if p.is_absolute() else p
+
+
+def parse_mount(s):
+ m_type, rest = s.split(':', maxsplit=1)
+ if m_type == 'tmpfs':
+ return MountTMPFS(relpath(rest))
+ elif m_type in ('rw_bind', 'ro_bind'):
+ write = m_type == 'rw_bind'
+ src, dst = rest.split(':', maxsplit=1)
+ return MountBind(pathlib.PosixPath(src), relpath(dst), write)
+ raise ValueError(m_type)
+
+
+@dataclasses.dataclass(frozen=True, slots=True):
+class Settings:
+ versions: pathlib.PosixPath
+ root: pathlib.PosixPath
+ environment: str
+ vars: dict
+ command: tuple
+ extra_mount: tuple
+
+ def __post_init__(self):
+ assert isinstance(command, tuple)
+ assert all(isinstance(arg, (str, bytes)) for arg in command)
+
+ assert isinstance(extra_mount, tuple)
+ assert all(isinstance(arg, (MountTMPFS, MountBind)) for arg in command)
+
+ assert isinstance(self.versions, pathlib.PosixPath)
+ assert self.versions.is_absolute()
+ assert self.versions.is_dir()
+ assert (self.versions / self.environment).is_dir()
+
+ self._check_root()
+
+ def _check_root(self):
+ assert isinstance(self.root, pathlib.PosixPath)
+ assert self.root.is_absolute()
+ assert self.root.is_dir()
+ assert (self.root / 'oldroot').is_dir()
+ assert (self.root / 'proc').is_dir()
+ assert (self.root / 'dev').is_dir()
+ assert (self.root / 'bin').is_dir()
+ assert (self.root / 'bin/sh').exists()
+ assert (self.root_environment).is_dir()
+
+ @classmethod
+ def from_args(cls, args):
+ if args.vars:
+ import yaml
+ with args.vars.open('rt'):
+ v = yaml.safe_load(f)
+ else:
+ v = {}
+
+ return cls(
+ versions=(args.versions or pathlib.PosixPath(v['versions'])),
+ root=args.root_dir,
+ environment=args.environment,
+ vars=v,
+ command=tuple(args.command),
+ extra_mount=tuple(args.extra_mount),
+ )
+
+
+def sandbox_run(settings, command):
+ mount('proc', settings.root / 'proc', 'proc', MountFlag.NOSUID | MountFlag.NODEV)
+ mount('/dev', settings.root / 'dev', "", MountFlag.BIND | MountFlag.NOSUID)
+ mountpoints = list(command.extra_mount)
+ mountpoints.append(MountBind(settings.versions, settings.versions.relative_to('/')))
+ for m in mountpoints:
+ m.mount(settings.root)
+ os.chroot(str(settings.root))
+ os.chdir('/')
+ exec_command(command)
+
+
+def main(args):
+ pidns_run(
+ CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWPID,
+ sandbox_run
+ Settings.from_args(args)
+ args.command,
+ )
+
+
+argument_parser = argparse.ArgumentParser(
+ description="User namespaces based sandbox for pthbs"
+)
+argument_parser.add_argument(
+ '--vars', '-y', type=pathlib.PosixPath,
+ description="vars.yaml to read configuration from"
+)
+argument_parser.add_argument(
+ '--versions', '-V', type=pathlib.PosixPath,
+ description="versions dir (e.g. /versions)"
+)
+argument_parser.add_argument(
+ '--extra-mount', action='append', type=parse_mount
+)
+argument_parser.add_argument('root_dir', type=pathlib.PosixPath)
+argument_parser.add_argument('environment')
+argument_parser.add_argument('command', nargs='+')
+
+
+if __name__ == '__main__':
+ args = argument_parser.parse_args()
+ main(args)