kernel_optimize_test/fs
Eric W. Biederman 015bb305b8 Merge branch 'nsfs-discovery'
Michael Kerrisk <<mtk.manpages@gmail.com> writes:

I would like to write code that discovers the namespace setup on a live
system.  The NS_GET_PARENT and NS_GET_USERNS ioctl() operations added in
Linux 4.9 provide much of what I want, but there are still a couple of
small pieces missing. Those pieces are added with this patch series.

Here's an example program that makes use of the new ioctl() operations.

8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---
/* ns_capable.c

   (C) 2016 Michael Kerrisk, <mtk.manpages@gmail.com>

   Licensed under the GNU General Public License v2 or later.

   Test whether a process (identified by PID) might (subject to LSM checks)
   have capabilities in a namespace (identified by a /proc/PID/ns/xxx file).
*/

			} while (0)

     			     exit(EXIT_FAILURE); } while (0)

/* Display capabilities sets of process with specified PID */

static void
show_cap(pid_t pid)
{
    cap_t caps;
    char *cap_string;

    caps = cap_get_pid(pid);
    if (caps == NULL)
	errExit("cap_get_proc");

    cap_string = cap_to_text(caps, NULL);
    if (cap_string == NULL)
	errExit("cap_to_text");

    printf("Capabilities: %s\n", cap_string);
}

/* Obtain the effective UID pf the process 'pid' by
   scanning its /proc/PID/file */

static uid_t
get_euid_of_process(pid_t pid)
{
    char path[PATH_MAX];
    char line[1024];
    int uid;

    snprintf(path, sizeof(path), "/proc/%ld/status", (long) pid);

    FILE *fp;
    fp = fopen(path, "r");
    if (fp == NULL)
	errExit("fopen-/proc/PID/status");

    for (;;) {
	if (fgets(line, sizeof(line), fp) == NULL) {

	    /* Should never happen... */

	    fprintf(stderr, "Failure scanning %s\n", path);
	    exit(EXIT_FAILURE);
	}

	if (strstr(line, "Uid:") == line) {
	    sscanf(line, "Uid: %*d %d %*d %*d", &uid);
	    return uid;
	}
    }
}

int
main(int argc, char *argv[])
{
    int ns_fd, userns_fd, pid_userns_fd;
    int nstype;
    int next_fd;
    struct stat pid_stat;
    struct stat target_stat;
    char *pid_str;
    pid_t pid;
    char path[PATH_MAX];

    if (argc < 2) {
	fprintf(stderr, "Usage: %s PID [ns-file]\n", argv[0]);
	fprintf(stderr, "\t'ns-file' is a /proc/PID/ns/xxxx file; "
		        "if omitted, use the namespace\n"
			"\treferred to by standard input "
			"(file descriptor 0)\n");
	exit(EXIT_FAILURE);
    }

    pid_str = argv[1];
    pid = atoi(pid_str);

    if (argc <= 2) {
	ns_fd = STDIN_FILENO;
    } else {
        ns_fd = open(argv[2], O_RDONLY);
        if (ns_fd == -1)
	    errExit("open-ns-file");
    }

    /* Get the relevant user namespace FD, which is 'ns_fd' if 'ns_fd' refers
       to a user namespace, otherwise the user namespace that owns 'ns_fd' */

    nstype = ioctl(ns_fd, NS_GET_NSTYPE);
    if (nstype == -1)
	errExit("ioctl-NS_GET_NSTYPE");

    if (nstype == CLONE_NEWUSER) {
	userns_fd = ns_fd;
    } else {
	userns_fd = ioctl(ns_fd, NS_GET_USERNS);
        if (userns_fd == -1)
	    errExit("ioctl-NS_GET_USERNS");
    }

    /* Obtain 'stat' info for the user namespace of the specified PID */

    snprintf(path, sizeof(path), "/proc/%s/ns/user", pid_str);

    pid_userns_fd = open(path, O_RDONLY);
    if (pid_userns_fd == -1)
	errExit("open-PID");

    if (fstat(pid_userns_fd, &pid_stat) == -1)
	errExit("fstat-PID");

    /* Get 'stat' info for the target user namesapce */

    if (fstat(userns_fd, &target_stat) == -1)
	errExit("fstat-PID");

    /* If the PID is in the target user namespace, then it has
       whatever capabilities are in its sets. */

    if (pid_stat.st_dev == target_stat.st_dev &&
		pid_stat.st_ino == target_stat.st_ino) {
        printf("PID is in target namespace\n");
	printf("Subject to LSM checks, it has the following capabilities\n");

	show_cap(pid);

	exit(EXIT_SUCCESS);
    }

    /* Otherwise, we need to walk through the ancestors of the target
       user namespace to see if PID is in an ancestor namespace */

    for (;;) {
	int f;

	next_fd = ioctl(userns_fd, NS_GET_PARENT);

	if (next_fd == -1) {

	    /* The error here should be EPERM... */

	    if (errno != EPERM)
	        errExit("ioctl-NS_GET_PARENT");

	    printf("PID is not in an ancestor namespace\n");
	    printf("It has no capabilities in the target namespace\n");

	    exit(EXIT_SUCCESS);
	}

        if (fstat(next_fd, &target_stat) == -1)
	    errExit("fstat-PID");

	/* If the 'stat' info for this user namespace matches the 'stat'
	 * info for 'next_fd', then the PID is in an ancestor namespace */

        if (pid_stat.st_dev == target_stat.st_dev &&
		    pid_stat.st_ino == target_stat.st_ino)
	    break;

	/* Next time round, get the next parent */

	f = userns_fd;
	userns_fd = next_fd;
	close(f);
    }

    /* At this point, we found that PID is in an ancestor of the target
       user namespace, and 'userns_fd' refers to the immediate descendant
       user namespace of PID in the chain of user namespaces from PID to
       the target user namespace. If the effective UID of PID matches the
       owner UID of descendant user namespace, then PID has all
       capabilities in the descendant namespace(s); otherwise, it just has
       the capabilities that are in its sets. */

    uid_t owner_uid, uid;
    if (ioctl(userns_fd, NS_GET_OWNER_UID, &owner_uid) == -1) {
	perror("ioctl-NS_GET_OWNER_UID");
	exit(EXIT_FAILURE);
    }

    uid = get_euid_of_process(pid);

    printf("PID is in an ancestor namespace\n");
    if (owner_uid == uid) {
	printf("And its effective UID matches the owner "
		"of the namespace\n");
	printf("Subject to LSM checks, PID has all capabilities in "
		"that namespace!\n");
    } else {
	printf("But its effective UID does not match the owner "
		"of the namespace\n");
	printf("Subject to LSM checks, it has the following capabilities\n");
	show_cap(pid);
    }

    exit(EXIT_SUCCESS);
}
8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---8x---

Michael Kerrisk (2):
  nsfs: Add an ioctl() to return the namespace type
  nsfs: Add an ioctl() to return owner UID of a userns

 fs/nsfs.c                 | 13 +++++++++++++
 include/uapi/linux/nsfs.h |  9 +++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)
2017-02-03 14:53:27 +13:00
..
9p
adfs
affs
afs fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
autofs4 fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
befs
bfs
btrfs
cachefiles
ceph
cifs fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
coda
configfs
cramfs
crypto
debugfs fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
devpts
dlm ktime: Get rid of ktime_equal() 2016-12-25 17:21:23 +01:00
ecryptfs
efivarfs
efs
exofs
exportfs
ext2 ext2: Return BH_New buffers for zeroed blocks 2016-12-26 20:29:24 -08:00
ext4 ext4: Simplify DAX fault path 2016-12-26 20:29:25 -08:00
f2fs
fat
freevxfs
fscache
fuse
gfs2
hfs
hfsplus
hostfs
hpfs
hugetlbfs
isofs
jbd2
jffs2
jfs
kernfs
lockd
minix
ncpfs
nfs fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
nfs_common
nfsd
nilfs2
nls
notify inotify: Convert to using per-namespace limits 2017-01-24 12:03:07 +13:00
ntfs
ocfs2
omfs
openpromfs
orangefs
overlayfs
proc proc: Better ownership of files for non-dumpable tasks in user namespaces 2017-01-24 12:03:09 +13:00
pstore
qnx4
qnx6
quota
ramfs
reiserfs
romfs
squashfs
sysfs
sysv
tracefs
ubifs
udf
ufs
xfs
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf_fdpic.c
binfmt_elf.c
binfmt_em86.c
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
block_dev.c
buffer.c
char_dev.c
compat_binfmt_elf.c
compat_ioctl.c
compat.c
coredump.c
dax.c dax: Call ->iomap_begin without entry lock during dax fault 2016-12-26 20:29:25 -08:00
dcache.c mnt: Protect the mountpoint hashtable with mount_lock 2017-01-10 13:34:43 +13:00
dcookies.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c exec: Remove LSM_UNSAFE_PTRACE_CAP 2017-01-24 12:03:08 +13:00
fcntl.c
fhandle.c
file_table.c
file.c
filesystems.c
fs_pin.c
fs_struct.c
fs-writeback.c
inode.c
internal.h
ioctl.c
iomap.c
Kconfig
Kconfig.binfmt
libfs.c libfs: Modify mount_pseudo_xattr to be clear it is not a userspace mount 2017-01-10 13:34:55 +13:00
locks.c
Makefile
mbcache.c
mount.h
mpage.c
namei.c fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
namespace.c fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
no-block.c
nsfs.c nsfs: Add an ioctl() to return owner UID of a userns 2017-02-03 14:35:43 +13:00
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c fs: Better permission checking for submounts 2017-02-02 04:36:12 +13:00
sync.c
timerfd.c
userfaultfd.c
utimes.c
xattr.c