From 7bd8830875bfa380c68f390efbad893293749324 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Jul 2016 06:35:24 -0500 Subject: [PATCH 1/3] cgroupns: Fix the locking in copy_cgroup_ns If "clone(CLONE_NEWCGROUP...)" is called it results in a nice lockdep valid splat. In __cgroup_proc_write the lock ordering is: cgroup_mutex -- through cgroup_kn_lock_live cgroup_threadgroup_rwsem In copy_process the guts of clone the lock ordering is: cgroup_threadgroup_rwsem -- through threadgroup_change_begin cgroup_mutex -- through copy_namespaces -- copy_cgroup_ns lockdep reports some a different call chains for the first ordering of cgroup_mutex and cgroup_threadgroup_rwsem but it is harder to trace. This is most definitely deadlock potential under the right circumstances. Fix this by by skipping the cgroup_mutex and making the locking in copy_cgroup_ns mirror the locking in cgroup_post_fork which also runs during fork under the cgroup_threadgroup_rwsem. Cc: stable@vger.kernel.org Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Signed-off-by: "Eric W. Biederman" Signed-off-by: Tejun Heo --- kernel/cgroup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 75c0ff00aca6..5f01e00cffc4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6309,14 +6309,11 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - mutex_lock(&cgroup_mutex); + /* It is not safe to take cgroup_mutex here */ spin_lock_irq(&css_set_lock); - cset = task_css_set(current); get_css_set(cset); - spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); new_ns = alloc_cgroup_ns(); if (IS_ERR(new_ns)) { From eedd0f4cbf5f3b81e82649832091e1d9d53f0709 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Jul 2016 06:35:51 -0500 Subject: [PATCH 2/3] cgroupns: Close race between cgroup_post_fork and copy_cgroup_ns In most code paths involving cgroup migration cgroup_threadgroup_rwsem is taken. There are two exceptions: - remove_tasks_in_empty_cpuset calls cgroup_transfer_tasks - vhost_attach_cgroups_work calls cgroup_attach_task_all With cgroup_threadgroup_rwsem held it is guaranteed that cgroup_post_fork and copy_cgroup_ns will reference the same css_set from the process calling fork. Without such an interlock there process after fork could reference one css_set from it's new cgroup namespace and another css_set from task->cgroups, which semantically is nonsensical. Cc: stable@vger.kernel.org Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Signed-off-by: "Eric W. Biederman" Signed-off-by: Tejun Heo --- kernel/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5f01e00cffc4..e75efa819911 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2962,6 +2962,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) int retval = 0; mutex_lock(&cgroup_mutex); + percpu_down_write(&cgroup_threadgroup_rwsem); for_each_root(root) { struct cgroup *from_cgrp; @@ -2976,6 +2977,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) if (retval) break; } + percpu_up_write(&cgroup_threadgroup_rwsem); mutex_unlock(&cgroup_mutex); return retval; @@ -4343,6 +4345,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) mutex_lock(&cgroup_mutex); + percpu_down_write(&cgroup_threadgroup_rwsem); + /* all tasks in @from are being moved, all csets are source */ spin_lock_irq(&css_set_lock); list_for_each_entry(link, &from->cset_links, cset_link) @@ -4371,6 +4375,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) } while (task && !ret); out_err: cgroup_migrate_finish(&preloaded_csets); + percpu_up_write(&cgroup_threadgroup_rwsem); mutex_unlock(&cgroup_mutex); return ret; } From 726a4994b05ff5b6f83d64b5b43c3251217366ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Jul 2016 06:36:44 -0500 Subject: [PATCH 3/3] cgroupns: Only allow creation of hierarchies in the initial cgroup namespace Unprivileged users can't use hierarchies if they create them as they do not have privilieges to the root directory. Which means the only thing a hiearchy created by an unprivileged user is good for is expanding the number of cgroup links in every css_set, which is a DOS attack. We could allow hierarchies to be created in namespaces in the initial user namespace. Unfortunately there is only a single namespace for the names of heirarchies, so that is likely to create more confusion than not. So do the simple thing and restrict hiearchy creation to the initial cgroup namespace. Cc: stable@vger.kernel.org Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Signed-off-by: "Eric W. Biederman" Signed-off-by: Tejun Heo --- kernel/cgroup.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e75efa819911..e0be49fc382f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2215,12 +2215,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; } - /* - * We know this subsystem has not yet been bound. Users in a non-init - * user namespace may only mount hierarchies with no bound subsystems, - * i.e. 'none,name=user1' - */ - if (!opts.none && !capable(CAP_SYS_ADMIN)) { + /* Hierarchies may only be created in the initial cgroup namespace. */ + if (ns != &init_cgroup_ns) { ret = -EPERM; goto out_unlock; }