#!/command/execlineb -s1

## usage: ns_run <root_directory> <executable> [<arg1> ...]
##
## Creates isolated namespace/container with given root and runs given
## executable in it.
##
## Environment variables used:
##   HOST - hostname to set
##   NS_ROOT - where to bind-mount the root directory
##   NS_EXTRA - extra execline script to run after setting up the namespaces
##              and mounting essential filesystems but before entering it
##              and unmounting host filesystem
##   NS_FSTAB - file with extra mounts to perform after running above script
##   NS_TMPFS - place to store binaries in the container that are run
##              before dropping privs, relative path from new root
##   NS_BIN - directory to get said binaries from; currently needs:
##            - `if` from execline
##            - busybox with `umount` and `chpst` functions
##            - anything you wish to call before and for dropping privs,
##              eg. s6-applyuidgid
##            All statically linked of course.
##   NS_NO_PID1 - Allow not running as init process of the namespace.

# import variables from environment, with defaults
multisubstitute {
	importas -D container HOST HOST
	importas -D /mnt/chroot NS_ROOT NS_ROOT
	importas -D mnt/ns NS_TMPFS  NS_TMPFS
	importas -D /mnt/volumes/containers/bin NS_BIN NS_BIN
}

# check we are PID1 (in a new PID namespace)
getpid PID
ifelse {
	importas -D "" NS_NO_PID1 NS_NO_PID1 
	importas -i PID PID
	test -z ${NS_NO_PID1} -a 1 -ne $PID
} {
	fdmove -c 1 2
	foreground { echo "ns_run: fatal: not PID 1" }
	exit 111
}
unexport PID


unshare -m -u -i  # new mount, UTS and IPC namespaces
foreground { importas -i HOST HOST hostname $HOST }

# We will generate final script we will exec into to enter the container.
# We need to generate it before we start mounting anything,
# so only the mountpoints that exist at this point will get unmounted and
# everything we will mount below will stay.
backtick -i NS_FINAL_SCRIPT {
	# Generate execline script that performs pivot_root
	# and umounts all the filesystems not used by the container
	# formerly: /root/ns_execline.zsh $@

	if {
		printf "%s\n"
		"if { pivot_root . \${NS_TMPFS}/oldroot }"
	}
	if {
		ns_umount_script.awk
		-vROOT=/${NS_TMPFS}/oldroot
		-vBIN=/${NS_TMPFS}/bin
		/proc/self/mountinfo
	}
	if { printf "%s\n" "./${NS_TMPFS}/bin/busybox chpst -/ ." }
	# quote all the arguments we got for interpretation by execline
	forx -o 0 X { $@ } importas -i X X s6-quote -- $X
}

# mount and enter the chroot directory
if { mount --rbind $1 $NS_ROOT }
cd $NS_ROOT

## this will be better handled by NS_EXTRA script
## and perhaps /etc/resolv.conf -> /run/resolv.conf symlink
# foreground { cp /etc/resolv.conf etc/ }

# /proc
if { mount -t proc proc proc }

# /dev
if { s6-mount -nwt tmpfs -o nosuid,dev,mode=0755 dev dev }
if { mknod -m 666 dev/null c 1 3 }
if { mknod -m 666 dev/full c 1 7 }
if { mknod -m 666 dev/ptmx c 5 2 }
if { mknod -m 644 dev/random c 1 8 }
if { mknod -m 644 dev/urandom c 1 9 }
if { mknod -m 666 dev/zero c 1 5 }
if { mknod -m 666 dev/tty c 5 0 }

# shm, pts and mqueue are provided below
# also have some convenience dirs in place for optionally bind-mounting them
if {
	mkdir

#	dev/shm
	run/shm

	dev/pts
	dev/mqueue

	dev/block
	dev/bus
	dev/char
	dev/dri
	dev/input
	dev/loop
	dev/net
	dev/snd
	dev/usb

}
if { mount -t devpts devpts dev/pts }
#if { s6-mount -nwt tmpfs -o nosuid,nodev,mode=1777 shm dev/shm }
if { s6-ln -sf ../run/shm dev/shm }
if { chmod 1777 run/shm }
if { s6-mount -nwt mqueue -o nosuid,nodev,noexec mqueue dev/mqueue }
# leave /dev read-write for now, so stuff can be added by scripts below

# mountpoint for privileged operations and pivot_root
if { s6-mount -nwt tmpfs -o nosuid,nodev,mode=700 mnt_ns $NS_TMPFS }
if { mkdir ${NS_TMPFS}/oldroot ${NS_TMPFS}/bin }
#if { s6-hiercopy $NS_BIN ${NS_TMPFS}/bin }
if { s6-mount -n -o bind,ro $NS_BIN ${NS_TMPFS}/bin }

# container-specific setup provided in environment
if {
	if -t { s6-test -v NS_EXTRA }
	importas -i NS_EXTRA NS_EXTRA
	execlineb -c $NS_EXTRA
}
unexport NS_EXTRA

if {
	if -t { s6-test -v NS_FSTAB }
	importas -i NS_FSTAB NS_FSTAB
	mount -a --fstab $NS_FSTAB
}
unexport NS_FSTAB

# now we can make /dev immutable
if { mount -o remount,ro dev }

# exec into the script we generated above, it:
# * does pivot_root to change rootdir
# * umounts all undesired filesystems
# * execs into $@
importas -i -u NS_FINAL_SCRIPT NS_FINAL_SCRIPT
execlineb -c $NS_FINAL_SCRIPT