lnstools

Linux namespace tools
git clone https://ccx.te2000.cz/git/lnstools
Log | Files | Refs | README

lns-lockdown_main.c (10336B)


      1 #include <unistd.h>
      2 #include <grp.h>
      3 #include <limits.h>
      4 #include <stdlib.h>
      5 #include <errno.h>
      6 #include <sys/prctl.h>
      7 #include <sys/capability.h>
      8 #include <sys/mman.h>  /* for mmap() */
      9 #include <sys/mount.h>
     10 #include <sys/syscall.h>  /* for SYS_pivot_root */
     11 #include <linux/filter.h>  /* for seccomp */
     12 #include <linux/seccomp.h>  /* for seccomp */
     13 #include <linux/securebits.h>  /* for prctl(PR_SET_SECUREBITS, ...) */
     14 
     15 #include <skalibs/types.h>
     16 #include <skalibs/setgroups.h>
     17 #include <skalibs/strerr.h>
     18 #include <skalibs/sgetopt.h>
     19 #include <skalibs/stralloc.h>
     20 #include <skalibs/djbunix.h>
     21 #include <skalibs/exec.h>
     22 
     23 #define USAGE "lns-lockdown [ -U iab_caps ] [ -S seccomp_bpf_fd ] [ -C chdir ] old-place-for-new-root new-place-for-old-root prog..."
     24 #define dieusage() strerr_dieusage(100, USAGE)
     25 
     26 /*** data structures ***/
     27 struct drop_privs_s {
     28     uid_t uid;
     29     gid_t gid;
     30     size_t gidn;
     31     gid_t gids[NGROUPS_MAX + 1];
     32     cap_iab_t new_iab;
     33 };
     34 
     35 struct unexport_s {
     36     size_t count;
     37     stralloc sa;
     38 };
     39 
     40 /*** syscall wrappers ***/
     41 int syscall_pivot_root(char const *new_root, char const *put_old) {
     42     return syscall(SYS_pivot_root, new_root, put_old);
     43 }
     44 int syscall_seccomp_set_filter(unsigned int flags, void *args) {
     45     return syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, args);
     46 }
     47 
     48 /*** utility functions ***/
     49 char const *const getenv_dienotset(char const *const name) {
     50     char const *x = getenv(name);
     51     if (!x) {
     52         strerr_dienotset(100, name);
     53     }
     54     return x;
     55 }
     56 
     57 int stralloc_catenv0(stralloc *sa, char const * env_name) {
     58     char const *const value = getenv_dienotset(env_name);
     59     return stralloc_catb(sa, value, strlen(value) + 1);
     60 }
     61 
     62 /*** individual steps of dropping into sandbox ***/
     63 void pivot_and_umount(
     64     char const *const new_root,
     65     char const *const put_old,
     66     struct unexport_s *const unex
     67 ) {
     68     size_t newroot_len;
     69     for(newroot_len=0; new_root[newroot_len]; newroot_len++) {
     70         if(new_root[newroot_len] != put_old[newroot_len]) {
     71             strerr_dief1x(100, "first path is not prefix of second path");
     72         }
     73         if(newroot_len > PATH_MAX) {
     74             strerr_dief1x(111, "realpath() returned invalid string");
     75         }
     76     }
     77     if(newroot_len == 0) {
     78         strerr_dief1x(111, "got empty path?");
     79     }
     80     size_t mtp_count, mtp_n;
     81     size_t oldroot_len = strlen(put_old) - newroot_len;
     82     stralloc sa_env = STRALLOC_ZERO;
     83     stralloc sa_mtp = STRALLOC_ZERO;
     84     if (!stralloc_catb(&sa_mtp, &put_old[newroot_len], oldroot_len)) {
     85         strerr_diefu1sys(111, "store mount path");
     86     }
     87     if (!size_scan(getenv_dienotset("NS_MTP_COUNT"), &mtp_count)) {
     88         strerr_dieinvalid(100, "NS_MTP_COUNT");
     89     }
     90     if (!stralloc_catb(&unex->sa, "NS_MTP_COUNT", 13)) {
     91         strerr_diefu1sys(111, "store env name");
     92     }
     93     unex->count++;
     94 
     95     /* pivot_root */
     96     if (chdir(new_root) < 0) {
     97         strerr_diefu2sys(111, "chdir to ", new_root) ;
     98     }
     99     if (syscall_pivot_root(".", put_old) < 0) {
    100         strerr_diefu1sys(111, "pivot_root") ;
    101     }
    102     if (chroot(".") < 0) {
    103         strerr_diefu1sys(111, "chroot") ;
    104     }
    105 
    106     /* umount oldroot */
    107     char env_name[SIZE_FMT + 8] = "NS_MTP_";
    108     size_t env_name_len;
    109     for (mtp_n = 1; mtp_n <= mtp_count; mtp_n++) {
    110         env_name_len = 7 + size_fmt(&env_name[7], mtp_n);
    111         env_name[env_name_len] = 0;
    112         sa_mtp.len = oldroot_len;
    113         if (!stralloc_catb(&unex->sa, env_name, env_name_len)) {
    114             strerr_diefu1sys(111, "store env name");
    115         }
    116         unex->count++;
    117 
    118         sa_mtp.len = oldroot_len;
    119         if (!stralloc_catenv0(&sa_mtp, env_name)) {
    120             strerr_diefu1sys(111, "store mount path");
    121         }
    122         if (sa_mtp.s[oldroot_len] != '/') {
    123             strerr_dieinvalid(100, env_name);
    124         }
    125         switch (umount(sa_mtp.s) ? errno : 0) {
    126             case 0:
    127                 break;
    128             case EBUSY:
    129                 if (umount2(sa_mtp.s, MNT_DETACH)) {
    130                     strerr_diefu2sys(111, "to detach mountpoint: ", sa_mtp.s) ;
    131                 }
    132                 break;
    133             default:
    134                 strerr_diefu2sys(111, "to umount: ", sa_mtp.s) ;
    135                 break;
    136         }
    137     }
    138 }
    139 
    140 void drop(struct drop_privs_s const *const privs) {
    141     /* make sure privileges are dropped permanently */
    142     if (prctl(PR_SET_SECUREBITS,
    143               SECBIT_KEEP_CAPS | /* technically unneeded as NO_SETUID_FIXUP is superset */
    144               SECBIT_NO_SETUID_FIXUP |
    145               SECBIT_NOROOT | /* disables suid and filecap privilege gain */
    146               SECBIT_NOROOT_LOCKED) < 0) {
    147         strerr_dief1sys(111, "Failed to set securebits via prctl()");
    148     }
    149 
    150     /* set these capabilities for the current process */
    151     if (cap_iab_set_proc(privs->new_iab) != 0) {
    152         strerr_dief1sys(111, "Failed to set capabilities via cap_set_proc()");
    153     }
    154 
    155     /* ancillary groups */
    156     if (privs->gidn != (size_t) -1
    157             && setgroups_and_gid(privs->gid ? privs->gid : getegid(), privs->gidn,
    158                                  privs->gids) < 0) {
    159         strerr_diefu1sys(111, "set supplementary group list");
    160     }
    161 
    162     /* primary group */
    163     if (privs->gid && setgid(privs->gid) < 0) {
    164         strerr_diefu1sys(111, "setgid");
    165     }
    166 
    167     /* set userid */
    168     if (privs->uid && setuid(privs->uid) < 0) {
    169         strerr_diefu1sys(111, "setuid");
    170     }
    171 }
    172 
    173 void load_seccomp_program(int fd) {
    174     strerr_dief(99, "seccomp program loading not yet implemented");
    175     struct sock_fprog seccomp_filter;
    176     struct stat st;
    177     if (fstat (fd, &st) < 0) {
    178         strerr_diefu1sys(111, "fstat() the seccomp program fd");
    179     }
    180     if (st.st_size > 0) {
    181         void * addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
    182         if (addr == MAP_FAILED) {
    183             strerr_diefu1sys(111, "mmap() the seccomp program fd");
    184         }
    185         seccomp_filter.len = st.st_size / 8;
    186         seccomp_filter.filter = (struct sock_filter *) addr;
    187     } else {
    188         stralloc sa = STRALLOC_ZERO;
    189         char read_buf[4096];
    190         ssize_t read_bytes;
    191         do {
    192             read_bytes = read (fd, read_buf, sizeof(read_buf));
    193             if (read_bytes < 0 && errno != EINTR) {
    194                 strerr_diefu1sys(111, "read() the seccomp program fd");
    195             } else if (read_bytes > 0) {
    196                 if (!stralloc_catb(&sa, read_buf, read_bytes)) {
    197                     strerr_diefu1sys(111, "store seccomp program");
    198                 }
    199             }
    200         } while (read_bytes != 0);
    201         seccomp_filter.len = sa.len / 8;
    202         seccomp_filter.filter = (struct sock_filter *) sa.s;
    203     }
    204     if (syscall_seccomp_set_filter(0, &seccomp_filter)) {
    205         strerr_diefu1sys(111, "apply the seccomp BPF program");
    206     }
    207 }
    208 
    209 int main (int argc, char const *const *argv) {
    210     struct unexport_s unex = { 0, STRALLOC_ZERO };
    211     int drop_privs = 0;
    212     struct drop_privs_s privs = {
    213         .uid = 0,
    214         .gid = 0,
    215         .gidn = (size_t) -1,
    216     };
    217     char const * chdir_to = 0;
    218     int seccomp_fd = -1;
    219 
    220     PROG = "ns_pivot_into";
    221     {
    222         subgetopt l = SUBGETOPT_ZERO;
    223         for (;;) {
    224             int opt = subgetopt_r(argc, argv, "U:S:C:", &l);
    225             if (opt == -1) {
    226                 break;
    227             }
    228             switch (opt) {
    229                 case 'C' : /* chdir-to */
    230                     chdir_to = l.arg;
    231                     break;
    232                 case 'S' : /* FD with seccomp BPF program */
    233                     if (!int_scan(l.arg, &seccomp_fd)) {
    234                         strerr_dieinvalid(100, "seccomp BPF program file descriptor");
    235                     }
    236                     if (ndelay_off(seccomp_fd)) {
    237                         strerr_diefu1sys(111, "make seccomp program fd non-blocking");
    238                     }
    239                     if (coe(seccomp_fd)) {
    240                         strerr_diefu1sys(111, "make seccomp program fd close-on-exec");
    241                     }
    242                     break;
    243                 case 'U' :  /* setuid from env */
    244                     drop_privs = 1;
    245                     /*
    246                      The IAB 3-tuple of capability vectors (Inh, Amb and Bound),
    247                      captured in type cap_iab_t combine to pass capabilities
    248                      from one process to another through execve(2) system calls.
    249                      */
    250                     privs.new_iab = cap_iab_from_text(l.arg);
    251                     if (privs.new_iab == NULL) {
    252                         strerr_dieinvalid(100, "capability set");
    253                     }
    254                     if (!uid0_scan(getenv_dienotset("UID"), &privs.uid)) {
    255                         strerr_dieinvalid(100, "UID");
    256                     }
    257                     if (!gid0_scan(getenv_dienotset("GID"), &privs.gid)) {
    258                         strerr_dieinvalid(100, "GID");
    259                     }
    260                     char const *x = getenv_dienotset("GIDLIST");
    261                     if (!gid_scanlist(privs.gids, NGROUPS_MAX + 1, x, &privs.gidn) && *x) {
    262                         strerr_dieinvalid(100, "GIDLIST");
    263                     }
    264                     if (!stralloc_catb(&unex.sa, "UID\0GID\0GIDLIST", 16)) {
    265                         strerr_diefu1sys(111, "store env names");
    266                     }
    267                     unex.count = 3;
    268                     break ;
    269                 default :
    270                     dieusage();
    271             }
    272         }
    273         argc -= l.ind ;
    274         argv += l.ind ;
    275     }
    276     if (argc < 3) {
    277         dieusage();
    278     }
    279     char *new_root = realpath(argv[0], NULL);
    280     if (new_root == NULL) {
    281         strerr_diefu1sys(111, "to resolve first path");
    282     }
    283     char *put_old = realpath(argv[1], NULL);
    284     if (new_root == NULL) {
    285         strerr_diefu1sys(111, "to resolve first path");
    286     }
    287     pivot_and_umount(new_root, put_old, &unex);
    288     free(new_root);
    289     free(put_old);
    290     if (drop_privs) {
    291         drop(&privs);
    292     } else if (seccomp_fd >= 0) {
    293         if (prctl (PR_SET_NO_NEW_PRIVS, 1L, 0L, 0L, 0L)) {
    294             strerr_diefu1sys(111, "enable NO_NEW_PRIVS for seccomp");
    295         }
    296     }
    297     if (seccomp_fd >= 0) {
    298         load_seccomp_program(seccomp_fd);
    299     }
    300     if (chdir_to) {
    301         if (chdir(chdir_to) < 0) {
    302             strerr_diefu2sys(111, "chdir to ", chdir_to);
    303         }
    304     }
    305     xmexec_n(&argv[2], unex.sa.s, unex.sa.len, unex.count);
    306 }