md updates for 4.2

A mixed bag
  - a few bug fixes
  - some performance improvement that decrease lock contention
  - some clean-up
 
 Nothing major.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJVi6weAAoJEDnsnt1WYoG50CsP/RqFbZicRSIvzXUURwP+yCP0
 3YZuURj4IXC6Cy/HLX+bZoj1p/b+GIRsZ72fWFJrd2LheaAI6WojCCLlnmXUtI/Y
 LIppF8/A2hfCNbF9cILByvrbzfndeEGK8kvootBDpvD0jlYiGePPAMQY2zx0MAyb
 T4yJ/KiziLniP6x7vqZrQ6I1MRVjeanN6RWXktFtixMpNOKUJe3PiZbUz4VDIrHR
 DaiHCbMjvRIkUWgNY8HmijEt+c8AYia7muqLj359dy2xF1hlUIdCx+61cgFD1zd8
 enKDH3xp+3B9BEgHe+AtxTAzpqSgU93tdhUjGcy/orA+yYjAAcA4ifngrzfE3VKb
 kwQgPh2JvUrubavrcto0hthS5RldrCpDXebOM4aEq+7lDHCwrZ39Qio5+1F7TLt5
 A5E3Eb7dPRdp9T3LrluX8/f7bO/Wbmxvv/RwnSLTpnGQoBWIAqCpQ+e9ro446Gsx
 /phXv3tE78fKj88LgQY/mm8ICeCppmQGLrpmjk9bkaZzqFdzQoURVmPh8QPMuJB4
 iMHpOOKLzrUlW/23rRxaIKwPuFyxlNuLAvyA3ezsymGiZ+SqSeFCEm1jN64EfMCI
 39rpfZt2pcVVOZJ9YeuzZG9wpie96yGZgnVWlP3FPjqRpboXqmtHlYA6EMRtqDAy
 mjSiGDF2bxkT1/YcjELD
 =sXTI
 -----END PGP SIGNATURE-----

Merge tag 'md/4.2' of git://neil.brown.name/md

Pull md updates from Neil Brown:
 "A mixed bag

   - a few bug fixes
   - some performance improvement that decrease lock contention
   - some clean-up

  Nothing major"

* tag 'md/4.2' of git://neil.brown.name/md:
  md: clear Blocked flag on failed devices when array is read-only.
  md: unlock mddev_lock on an error path.
  md: clear mddev->private when it has been freed.
  md: fix a build warning
  md/raid5: ignore released_stripes check
  md/raid5: per hash value and exclusive wait_for_stripe
  md/raid5: split wait_for_stripe and introduce wait_for_quiescent
  wait: introduce wait_event_exclusive_cmd
  md: convert to kstrto*()
  md/raid10: make sync_request_write() call bio_copy_data()
This commit is contained in:
Linus Torvalds 2015-06-29 11:10:56 -07:00
commit 6aaf0da872
5 changed files with 146 additions and 101 deletions

View File

@ -2628,13 +2628,14 @@ errors_show(struct md_rdev *rdev, char *page)
static ssize_t
errors_store(struct md_rdev *rdev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
if (*buf && (*e == 0 || *e == '\n')) {
atomic_set(&rdev->corrected_errors, n);
return len;
}
return -EINVAL;
unsigned int n;
int rv;
rv = kstrtouint(buf, 10, &n);
if (rv < 0)
return rv;
atomic_set(&rdev->corrected_errors, n);
return len;
}
static struct rdev_sysfs_entry rdev_errors =
__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
@ -2651,13 +2652,16 @@ slot_show(struct md_rdev *rdev, char *page)
static ssize_t
slot_store(struct md_rdev *rdev, const char *buf, size_t len)
{
char *e;
int slot;
int err;
int slot = simple_strtoul(buf, &e, 10);
if (strncmp(buf, "none", 4)==0)
slot = -1;
else if (e==buf || (*e && *e!= '\n'))
return -EINVAL;
else {
err = kstrtouint(buf, 10, (unsigned int *)&slot);
if (err < 0)
return err;
}
if (rdev->mddev->pers && slot == -1) {
/* Setting 'slot' on an active array requires also
* updating the 'rd%d' link, and communicating
@ -3542,12 +3546,12 @@ layout_show(struct mddev *mddev, char *page)
static ssize_t
layout_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
unsigned int n;
int err;
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
err = kstrtouint(buf, 10, &n);
if (err < 0)
return err;
err = mddev_lock(mddev);
if (err)
return err;
@ -3591,12 +3595,12 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks);
static ssize_t
raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned int n;
int err;
unsigned long n = simple_strtoul(buf, &e, 10);
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
err = kstrtouint(buf, 10, &n);
if (err < 0)
return err;
err = mddev_lock(mddev);
if (err)
@ -3643,12 +3647,12 @@ chunk_size_show(struct mddev *mddev, char *page)
static ssize_t
chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
{
unsigned long n;
int err;
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
err = kstrtoul(buf, 10, &n);
if (err < 0)
return err;
err = mddev_lock(mddev);
if (err)
@ -3686,19 +3690,24 @@ resync_start_show(struct mddev *mddev, char *page)
static ssize_t
resync_start_store(struct mddev *mddev, const char *buf, size_t len)
{
unsigned long long n;
int err;
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
if (cmd_match(buf, "none"))
n = MaxSector;
else {
err = kstrtoull(buf, 10, &n);
if (err < 0)
return err;
if (n != (sector_t)n)
return -EINVAL;
}
err = mddev_lock(mddev);
if (err)
return err;
if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
err = -EBUSY;
else if (cmd_match(buf, "none"))
n = MaxSector;
else if (!*buf || (*e && *e != '\n'))
err = -EINVAL;
if (!err) {
mddev->recovery_cp = n;
@ -3934,14 +3943,14 @@ max_corrected_read_errors_show(struct mddev *mddev, char *page) {
static ssize_t
max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
unsigned int n;
int rv;
if (*buf && (*e == 0 || *e == '\n')) {
atomic_set(&mddev->max_corr_read_errors, n);
return len;
}
return -EINVAL;
rv = kstrtouint(buf, 10, &n);
if (rv < 0)
return rv;
atomic_set(&mddev->max_corr_read_errors, n);
return len;
}
static struct md_sysfs_entry max_corr_read_errors =
@ -4003,8 +4012,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
else
rdev = md_import_device(dev, -1, -1);
if (IS_ERR(rdev))
if (IS_ERR(rdev)) {
mddev_unlock(mddev);
return PTR_ERR(rdev);
}
err = bind_rdev_to_array(rdev, mddev);
out:
if (err)
@ -4298,15 +4309,18 @@ sync_min_show(struct mddev *mddev, char *page)
static ssize_t
sync_min_store(struct mddev *mddev, const char *buf, size_t len)
{
int min;
char *e;
unsigned int min;
int rv;
if (strncmp(buf, "system", 6)==0) {
mddev->sync_speed_min = 0;
return len;
min = 0;
} else {
rv = kstrtouint(buf, 10, &min);
if (rv < 0)
return rv;
if (min == 0)
return -EINVAL;
}
min = simple_strtoul(buf, &e, 10);
if (buf == e || (*e && *e != '\n') || min <= 0)
return -EINVAL;
mddev->sync_speed_min = min;
return len;
}
@ -4324,15 +4338,18 @@ sync_max_show(struct mddev *mddev, char *page)
static ssize_t
sync_max_store(struct mddev *mddev, const char *buf, size_t len)
{
int max;
char *e;
unsigned int max;
int rv;
if (strncmp(buf, "system", 6)==0) {
mddev->sync_speed_max = 0;
return len;
max = 0;
} else {
rv = kstrtouint(buf, 10, &max);
if (rv < 0)
return rv;
if (max == 0)
return -EINVAL;
}
max = simple_strtoul(buf, &e, 10);
if (buf == e || (*e && *e != '\n') || max <= 0)
return -EINVAL;
mddev->sync_speed_max = max;
return len;
}
@ -4515,12 +4532,13 @@ suspend_lo_show(struct mddev *mddev, char *page)
static ssize_t
suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
unsigned long long old;
unsigned long long old, new;
int err;
if (buf == e || (*e && *e != '\n'))
err = kstrtoull(buf, 10, &new);
if (err < 0)
return err;
if (new != (sector_t)new)
return -EINVAL;
err = mddev_lock(mddev);
@ -4557,12 +4575,13 @@ suspend_hi_show(struct mddev *mddev, char *page)
static ssize_t
suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
unsigned long long old;
unsigned long long old, new;
int err;
if (buf == e || (*e && *e != '\n'))
err = kstrtoull(buf, 10, &new);
if (err < 0)
return err;
if (new != (sector_t)new)
return -EINVAL;
err = mddev_lock(mddev);
@ -4604,11 +4623,13 @@ static ssize_t
reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
{
struct md_rdev *rdev;
char *e;
unsigned long long new;
int err;
unsigned long long new = simple_strtoull(buf, &e, 10);
if (buf == e || (*e && *e != '\n'))
err = kstrtoull(buf, 10, &new);
if (err < 0)
return err;
if (new != (sector_t)new)
return -EINVAL;
err = mddev_lock(mddev);
if (err)
@ -5157,6 +5178,7 @@ int md_run(struct mddev *mddev)
mddev_detach(mddev);
if (mddev->private)
pers->free(mddev, mddev->private);
mddev->private = NULL;
module_put(pers->owner);
bitmap_destroy(mddev);
return err;
@ -5292,6 +5314,7 @@ static void md_clean(struct mddev *mddev)
mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
mddev->private = NULL;
mddev->merge_check_needed = 0;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 0;
@ -5364,6 +5387,7 @@ static void __md_stop(struct mddev *mddev)
mddev->pers = NULL;
spin_unlock(&mddev->lock);
pers->free(mddev, mddev->private);
mddev->private = NULL;
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
@ -6373,7 +6397,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->ctime != info->ctime ||
mddev->level != info->level ||
/* mddev->layout != info->layout || */
!mddev->persistent != info->not_persistent||
mddev->persistent != !info->not_persistent ||
mddev->chunk_sectors != info->chunk_size >> 9 ||
/* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
((state^info->state) & 0xfffffe00)
@ -8104,6 +8128,15 @@ void md_check_recovery(struct mddev *mddev)
int spares = 0;
if (mddev->ro) {
struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices
* will be recorded if array switched to read/write.
* Leaving it set will prevent the device
* from being removed.
*/
rdev_for_each(rdev, mddev)
clear_bit(Blocked, &rdev->flags);
/* On a read-only array we can:
* - remove failed devices
* - add already-in_sync devices if the array itself
@ -9011,13 +9044,7 @@ static int get_ro(char *buffer, struct kernel_param *kp)
}
static int set_ro(const char *val, struct kernel_param *kp)
{
char *e;
int num = simple_strtoul(val, &e, 10);
if (*val && (*e == '\0' || *e == '\n')) {
start_readonly = num;
return 0;
}
return -EINVAL;
return kstrtouint(val, 10, (unsigned int *)&start_readonly);
}
module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);

View File

@ -2099,17 +2099,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
for (j=0; j < vcnt ; j++) {
tbio->bi_io_vec[j].bv_offset = 0;
tbio->bi_io_vec[j].bv_len = PAGE_SIZE;
memcpy(page_address(tbio->bi_io_vec[j].bv_page),
page_address(fbio->bi_io_vec[j].bv_page),
PAGE_SIZE);
}
tbio->bi_end_io = end_sync_write;
bio_copy_data(tbio, fbio);
d = r10_bio->devs[i].devnum;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
@ -2124,17 +2117,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* that are active
*/
for (i = 0; i < conf->copies; i++) {
int j, d;
int d;
tbio = r10_bio->devs[i].repl_bio;
if (!tbio || !tbio->bi_end_io)
continue;
if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
&& r10_bio->devs[i].bio != fbio)
for (j = 0; j < vcnt; j++)
memcpy(page_address(tbio->bi_io_vec[j].bv_page),
page_address(fbio->bi_io_vec[j].bv_page),
PAGE_SIZE);
bio_copy_data(tbio, fbio);
d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
md_sync_acct(conf->mirrors[d].replacement->bdev,

View File

@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf,
int hash)
{
int size;
bool do_wakeup = false;
unsigned long do_wakeup = 0;
int i = 0;
unsigned long flags;
if (hash == NR_STRIPE_HASH_LOCKS) {
@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf,
!list_empty(list))
atomic_dec(&conf->empty_inactive_list_nr);
list_splice_tail_init(list, conf->inactive_list + hash);
do_wakeup = true;
do_wakeup |= 1 << hash;
spin_unlock_irqrestore(conf->hash_locks + hash, flags);
}
size--;
hash--;
}
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
if (do_wakeup & (1 << i))
wake_up(&conf->wait_for_stripe[i]);
}
if (do_wakeup) {
wake_up(&conf->wait_for_stripe);
if (atomic_read(&conf->active_stripes) == 0)
wake_up(&conf->wait_for_quiescent);
if (conf->retry_read_aligned)
md_wakeup_thread(conf->mddev->thread);
}
@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
spin_lock_irq(conf->hash_locks + hash);
do {
wait_event_lock_irq(conf->wait_for_stripe,
wait_event_lock_irq(conf->wait_for_quiescent,
conf->quiesce == 0 || noquiesce,
*(conf->hash_locks + hash));
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
sh = get_free_stripe(conf, hash);
if (!sh && llist_empty(&conf->released_stripes) &&
!test_bit(R5_DID_ALLOC, &conf->cache_state))
if (!sh && !test_bit(R5_DID_ALLOC,
&conf->cache_state))
set_bit(R5_ALLOC_MORE,
&conf->cache_state);
}
@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
if (!sh) {
set_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state);
wait_event_lock_irq(
conf->wait_for_stripe,
wait_event_exclusive_cmd(
conf->wait_for_stripe[hash],
!list_empty(conf->inactive_list + hash) &&
(atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes * 3 / 4)
|| !test_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state)),
*(conf->hash_locks + hash));
spin_unlock_irq(conf->hash_locks + hash),
spin_lock_irq(conf->hash_locks + hash));
clear_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state);
} else {
@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
}
} while (sh == NULL);
if (!list_empty(conf->inactive_list + hash))
wake_up(&conf->wait_for_stripe[hash]);
spin_unlock_irq(conf->hash_locks + hash);
return sh;
}
@ -2177,7 +2188,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
cnt = 0;
list_for_each_entry(nsh, &newstripes, lru) {
lock_device_hash_lock(conf, hash);
wait_event_cmd(conf->wait_for_stripe,
wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
!list_empty(conf->inactive_list + hash),
unlock_device_hash_lock(conf, hash),
lock_device_hash_lock(conf, hash));
@ -4760,7 +4771,7 @@ static void raid5_align_endio(struct bio *bi, int error)
raid_bi, 0);
bio_endio(raid_bi, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
wake_up(&conf->wait_for_quiescent);
return;
}
@ -4855,7 +4866,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
align_bi->bi_iter.bi_sector += rdev->data_offset;
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
wait_event_lock_irq(conf->wait_for_quiescent,
conf->quiesce == 0,
conf->device_lock);
atomic_inc(&conf->active_aligned_reads);
@ -5699,7 +5710,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
bio_endio(raid_bio, 0);
}
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
wake_up(&conf->wait_for_quiescent);
return handled;
}
@ -6433,7 +6444,10 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
spin_lock_init(&conf->device_lock);
seqcount_init(&conf->gen_lock);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
init_waitqueue_head(&conf->wait_for_stripe[i]);
}
init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->hold_list);
@ -7466,7 +7480,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
* active stripes can drain
*/
conf->quiesce = 2;
wait_event_cmd(conf->wait_for_stripe,
wait_event_cmd(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,
unlock_all_device_hash_locks_irq(conf),
@ -7480,7 +7494,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
case 0: /* re-enable writes */
lock_all_device_hash_locks_irq(conf);
conf->quiesce = 0;
wake_up(&conf->wait_for_stripe);
wake_up(&conf->wait_for_quiescent);
wake_up(&conf->wait_for_overlap);
unlock_all_device_hash_locks_irq(conf);
break;

View File

@ -511,7 +511,8 @@ struct r5conf {
struct list_head inactive_list[NR_STRIPE_HASH_LOCKS];
atomic_t empty_inactive_list_nr;
struct llist_head released_stripes;
wait_queue_head_t wait_for_stripe;
wait_queue_head_t wait_for_quiescent;
wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS];
wait_queue_head_t wait_for_overlap;
unsigned long cache_state;
#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,

View File

@ -358,6 +358,19 @@ do { \
__ret; \
})
#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \
cmd1; schedule(); cmd2)
/*
* Just like wait_event_cmd(), except it sets exclusive flag
*/
#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \
do { \
if (condition) \
break; \
__wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \
} while (0)
#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)