forked from luck/tmp_suning_uos_patched
md: set MD_CHANGE_PENDING in a atomic region
Some code waits for a metadata update by: 1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN) 2. setting MD_CHANGE_PENDING and waking the management thread 3. waiting for MD_CHANGE_PENDING to be cleared If the first two are done without locking, the code in md_update_sb() which checks if it needs to repeat might test if an update is needed before step 1, then clear MD_CHANGE_PENDING after step 2, resulting in the wait returning early. So make sure all places that set MD_CHANGE_PENDING are atomicial, and bit_clear_unless (suggested by Neil) is introduced for the purpose. Cc: Martin Kepplinger <martink@posteo.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: <linux-kernel@vger.kernel.org> Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
This commit is contained in:
parent
fe67d19a2d
commit
85ad1d13ee
@ -2295,12 +2295,16 @@ void md_update_sb(struct mddev *mddev, int force_change)
|
||||
if (mddev_is_clustered(mddev)) {
|
||||
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
|
||||
force_change = 1;
|
||||
if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
|
||||
nospares = 1;
|
||||
ret = md_cluster_ops->metadata_update_start(mddev);
|
||||
/* Has someone else has updated the sb */
|
||||
if (!does_sb_need_changing(mddev)) {
|
||||
if (ret == 0)
|
||||
md_cluster_ops->metadata_update_cancel(mddev);
|
||||
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
|
||||
BIT(MD_CHANGE_DEVS) |
|
||||
BIT(MD_CHANGE_CLEAN));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2434,15 +2438,11 @@ void md_update_sb(struct mddev *mddev, int force_change)
|
||||
if (mddev_is_clustered(mddev) && ret == 0)
|
||||
md_cluster_ops->metadata_update_finish(mddev);
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
if (mddev->in_sync != sync_req ||
|
||||
test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
|
||||
!bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
|
||||
/* have to write it out again */
|
||||
spin_unlock(&mddev->lock);
|
||||
goto repeat;
|
||||
}
|
||||
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
spin_unlock(&mddev->lock);
|
||||
wake_up(&mddev->sb_wait);
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
@ -8147,18 +8147,18 @@ void md_do_sync(struct md_thread *thread)
|
||||
}
|
||||
}
|
||||
skip:
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
ret == 0) {
|
||||
/* set CHANGE_PENDING here since maybe another
|
||||
* update is needed, so other nodes are informed */
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
|
||||
md_wakeup_thread(mddev->thread);
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
}
|
||||
} else
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
@ -8550,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);
|
||||
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new)
|
||||
{
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
int rv;
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
@ -8559,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
if (rv == 0) {
|
||||
/* Make sure they get written out promptly */
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
return 1;
|
||||
} else
|
||||
|
@ -1474,8 +1474,8 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
* if recovery is running, make sure it aborts.
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
|
||||
printk(KERN_ALERT
|
||||
"md/raid1:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid1:%s: Operation continuing on %d devices.\n",
|
||||
|
@ -1102,8 +1102,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio->bi_iter.bi_sector < conf->reshape_progress))) {
|
||||
/* Need to update reshape_position in metadata */
|
||||
mddev->reshape_position = conf->reshape_progress;
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
|
||||
md_wakeup_thread(mddev->thread);
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
|
||||
@ -1591,8 +1591,8 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid10:%s: Disk failure on %s, disabling device.\n"
|
||||
|
@ -712,8 +712,8 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
|
||||
* in_teardown check workaround this issue.
|
||||
*/
|
||||
if (!log->in_teardown) {
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
|
||||
md_wakeup_thread(mddev->thread);
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
|
||||
|
@ -2514,8 +2514,8 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
|
||||
printk(KERN_ALERT
|
||||
"md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
|
@ -227,6 +227,22 @@ static inline unsigned long __ffs64(u64 word)
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef bit_clear_unless
|
||||
#define bit_clear_unless(ptr, _clear, _test) \
|
||||
({ \
|
||||
const typeof(*ptr) clear = (_clear), test = (_test); \
|
||||
typeof(*ptr) old, new; \
|
||||
\
|
||||
do { \
|
||||
old = ACCESS_ONCE(*ptr); \
|
||||
new = old & ~clear; \
|
||||
} while (!(old & test) && \
|
||||
cmpxchg(ptr, old, new) != old); \
|
||||
\
|
||||
!(old & test); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef find_last_bit
|
||||
/**
|
||||
* find_last_bit - find the last set bit in a memory region
|
||||
|
Loading…
Reference in New Issue
Block a user