vfio-mdev: Fix remove race

Using the mtty mdev sample driver we can generate a remove race by
starting one shell that continuously creates mtty devices and several
other shells all attempting to remove devices, in my case four remove
shells.  The fault occurs in mdev_remove_sysfs_files() where the
passed type arg is NULL, which suggests we've received a struct device
in mdev_device_remove() but it's in some sort of teardown state.  The
solution here is to make use of the accidentally unused list_head on
the mdev_device such that the mdev core keeps a list of all the mdev
devices.  This allows us to validate that we have a valid mdev before
we start removal, remove it from the list to prevent others from
working on it, and if the vendor driver refuses to remove, we can
re-add it to the list.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Alex Williamson 2016-12-30 08:13:33 -07:00
parent 6c38c055cc
commit 49550787a9

View File

@ -27,6 +27,9 @@ static LIST_HEAD(parent_list);
static DEFINE_MUTEX(parent_list_lock); static DEFINE_MUTEX(parent_list_lock);
static struct class_compat *mdev_bus_compat_class; static struct class_compat *mdev_bus_compat_class;
static LIST_HEAD(mdev_list);
static DEFINE_MUTEX(mdev_list_lock);
static int _find_mdev_device(struct device *dev, void *data) static int _find_mdev_device(struct device *dev, void *data)
{ {
struct mdev_device *mdev; struct mdev_device *mdev;
@ -316,6 +319,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
dev_dbg(&mdev->dev, "MDEV: created\n"); dev_dbg(&mdev->dev, "MDEV: created\n");
mutex_unlock(&parent->lock); mutex_unlock(&parent->lock);
mutex_lock(&mdev_list_lock);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret; return ret;
create_failed: create_failed:
@ -329,12 +337,30 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
int mdev_device_remove(struct device *dev, bool force_remove) int mdev_device_remove(struct device *dev, bool force_remove)
{ {
struct mdev_device *mdev; struct mdev_device *mdev, *tmp;
struct parent_device *parent; struct parent_device *parent;
struct mdev_type *type; struct mdev_type *type;
int ret; int ret;
bool found = false;
mdev = to_mdev_device(dev); mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) {
if (tmp == mdev) {
found = true;
break;
}
}
if (found)
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock);
if (!found)
return -ENODEV;
type = to_mdev_type(mdev->type_kobj); type = to_mdev_type(mdev->type_kobj);
parent = mdev->parent; parent = mdev->parent;
mutex_lock(&parent->lock); mutex_lock(&parent->lock);
@ -342,6 +368,11 @@ int mdev_device_remove(struct device *dev, bool force_remove)
ret = mdev_device_remove_ops(mdev, force_remove); ret = mdev_device_remove_ops(mdev, force_remove);
if (ret) { if (ret) {
mutex_unlock(&parent->lock); mutex_unlock(&parent->lock);
mutex_lock(&mdev_list_lock);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret; return ret;
} }
@ -349,7 +380,8 @@ int mdev_device_remove(struct device *dev, bool force_remove)
device_unregister(dev); device_unregister(dev);
mutex_unlock(&parent->lock); mutex_unlock(&parent->lock);
mdev_put_parent(parent); mdev_put_parent(parent);
return ret;
return 0;
} }
static int __init mdev_init(void) static int __init mdev_init(void)