btrfs: enumerate the type of exclusive operation in progress

Instead of using a flag bit for exclusive operation, use a variable to
store which exclusive operation is being performed.  Introduce an API
to start and finish an exclusive operation.

This would enable another way for tools to check which operation is
running on why starting an exclusive operation failed. The followup
patch adds a sysfs_notify() to alert userspace when the state changes, so
userspace can perform select() on it to get notified of the change.

This would enable us to enqueue a command which will wait for current
exclusive operation to complete before issuing the next exclusive
operation. This has been done synchronously as opposed to a background
process, or else error collection (if any) will become difficult.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Goldwyn Rodrigues 2020-08-25 10:02:32 -05:00 committed by David Sterba
parent ca10845a56
commit c3e1f96c37
5 changed files with 65 additions and 41 deletions

View File

@ -541,11 +541,6 @@ enum {
BTRFS_FS_QUOTA_OVERRIDE,
/* Used to record internally whether fs has been frozen */
BTRFS_FS_FROZEN,
/*
* Indicate that a whole-filesystem exclusive operation is running
* (device replace, resize, device add/delete, balance)
*/
BTRFS_FS_EXCL_OP,
/*
* Indicate that balance has been set up from the ioctl and is in the
* main phase. The fs_info::balance_ctl is initialized.
@ -566,6 +561,19 @@ enum {
BTRFS_FS_DISCARD_RUNNING,
};
/*
* Exclusive operations (device replace, resize, device add/remove, balance)
*/
enum btrfs_exclusive_operation {
BTRFS_EXCLOP_NONE,
BTRFS_EXCLOP_BALANCE,
BTRFS_EXCLOP_DEV_ADD,
BTRFS_EXCLOP_DEV_REMOVE,
BTRFS_EXCLOP_DEV_REPLACE,
BTRFS_EXCLOP_RESIZE,
BTRFS_EXCLOP_SWAP_ACTIVATE,
};
struct btrfs_fs_info {
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
unsigned long flags;
@ -937,6 +945,9 @@ struct btrfs_fs_info {
*/
int send_in_progress;
/* Type of exclusive operation running */
unsigned long exclusive_operation;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@ -3032,6 +3043,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
/* file.c */
int __init btrfs_auto_defrag_init(void);

View File

@ -1025,7 +1025,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
* should never allow both to start and pause. We don't want to allow
* dev-replace to start anyway.
*/
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) {
down_write(&dev_replace->rwsem);
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
@ -1062,7 +1062,7 @@ static int btrfs_dev_replace_kthread(void *data)
ret = btrfs_dev_replace_finishing(fs_info, ret);
WARN_ON(ret && ret != -ECANCELED);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
return 0;
}

View File

@ -10015,14 +10015,14 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
/*
* Balance or device remove/replace/resize can move stuff around from
* under us. The EXCL_OP flag makes sure they aren't running/won't run
* concurrently while we are mapping the swap extents, and
* fs_info->swapfile_pins prevents them from running while the swap file
* is active and moving the extents. Note that this also prevents a
* concurrent device add which isn't actually necessary, but it's not
* under us. The exclop protection makes sure they aren't running/won't
* run concurrently while we are mapping the swap extents, and
* fs_info->swapfile_pins prevents them from running while the swap
* file is active and moving the extents. Note that this also prevents
* a concurrent device add which isn't actually necessary, but it's not
* really worth the trouble to allow it.
*/
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
btrfs_warn(fs_info,
"cannot activate swapfile while exclusive operation is running");
return -EBUSY;
@ -10168,7 +10168,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
btrfs_swap_deactivate(file);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
if (ret)
return ret;

View File

@ -378,6 +378,17 @@ static int check_xflags(unsigned int flags)
return 0;
}
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type)
{
return !cmpxchg(&fs_info->exclusive_operation, BTRFS_EXCLOP_NONE, type);
}
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
{
WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
}
/*
* Set the xflags from the internal inode flags. The remaining items of fsxattr
* are zeroed.
@ -1639,7 +1650,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (ret)
return ret;
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_RESIZE)) {
mnt_drop_write_file(file);
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
@ -1753,7 +1764,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
out_free:
kfree(vol_args);
out:
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
mnt_drop_write_file(file);
return ret;
}
@ -3127,7 +3138,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
vol_args = memdup_user(arg, sizeof(*vol_args));
@ -3144,7 +3155,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
kfree(vol_args);
out:
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
return ret;
}
@ -3173,7 +3184,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out;
}
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
@ -3184,7 +3195,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
if (!ret) {
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
@ -3215,7 +3226,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
if (ret)
return ret;
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_drop_write;
}
@ -3233,7 +3244,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
kfree(vol_args);
out:
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
out_drop_write:
mnt_drop_write_file(file);
@ -3737,11 +3748,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
ret = -EROFS;
goto out;
}
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
ret = btrfs_dev_replace_by_ioctl(fs_info, p);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
}
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
@ -3952,7 +3963,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
return ret;
again:
if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
@ -3998,7 +4009,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
}
locked:
BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
@ -4053,10 +4063,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
do_balance:
/*
* Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
* btrfs_balance. bctl is freed in reset_balance_state, or, if
* restriper was paused all the way until unmount, in free_fs_info.
* The flag should be cleared after reset_balance_state.
* Ownership of bctl and exclusive operation goes to btrfs_balance.
* bctl is freed in reset_balance_state, or, if restriper was paused
* all the way until unmount, in free_fs_info. The flag should be
* cleared after reset_balance_state.
*/
need_unlock = false;
@ -4075,7 +4085,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
out_unlock:
mutex_unlock(&fs_info->balance_mutex);
if (need_unlock)
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
out:
mnt_drop_write_file(file);
return ret;

View File

@ -291,8 +291,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* balance_mutex
*
*
* Exclusive operations, BTRFS_FS_EXCL_OP
* ======================================
* Exclusive operations
* ====================
*
* Maintains the exclusivity of the following operations that apply to the
* whole filesystem and cannot run in parallel.
@ -318,11 +318,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* - system power-cycle and filesystem mounted as read-only
* - filesystem or device errors leading to forced read-only
*
* BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations.
* During the course of Paused state, the BTRFS_FS_EXCL_OP remains set.
* The status of exclusive operation is set and cleared atomically.
* During the course of Paused state, fs_info::exclusive_operation remains set.
* A device operation in Paused or Running state can be canceled or resumed
* either by ioctl (Balance only) or when remounted as read-write.
* BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or
* The exclusive status is cleared when the device operation is canceled or
* completed.
*/
@ -4033,7 +4033,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
/*
* rw_devices will not change at the moment, device add/delete/replace
* are excluded by EXCL_OP
* are exclusive
*/
num_devices = fs_info->fs_devices->rw_devices;
@ -4169,7 +4169,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
reset_balance_state(fs_info);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
}
wake_up(&fs_info->balance_wait_q);
@ -4180,7 +4180,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
reset_balance_state(fs_info);
else
kfree(bctl);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
return ret;
}
@ -4282,7 +4282,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
* is in a paused state and must have fs_info::balance_ctl properly
* set up.
*/
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
btrfs_warn(fs_info,
"balance: cannot set exclusive op status, resume manually");
@ -4364,7 +4364,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
if (fs_info->balance_ctl) {
reset_balance_state(fs_info);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
btrfs_exclop_finish(fs_info);
btrfs_info(fs_info, "balance: canceled");
}
}