diff --git a/drivers/md/md.c b/drivers/md/md.c index 24638ccedce4..daa885ee4d60 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1826,8 +1826,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_BITMAP)) rdev->saved_raid_disk = -1; - } else - set_bit(In_sync, &rdev->flags); + } else { + /* + * If the array is FROZEN, then the device can't + * be in_sync with rest of array. + */ + if (!test_bit(MD_RECOVERY_FROZEN, + &mddev->recovery)) + set_bit(In_sync, &rdev->flags); + } rdev->raid_disk = role; break; } @@ -3664,11 +3671,7 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) return -EINVAL; if (decimals < 0) decimals = 0; - while (decimals < scale) { - result *= 10; - decimals ++; - } - *res = result; + *res = result * int_pow(10, scale - decimals); return 0; } @@ -5182,6 +5185,34 @@ static struct md_sysfs_entry md_consistency_policy = __ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show, consistency_policy_store); +static ssize_t fail_last_dev_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%d\n", mddev->fail_last_dev); +} + +/* + * Setting fail_last_dev to true to allow last device to be forcibly removed + * from RAID1/RAID10. + */ +static ssize_t +fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len) +{ + int ret; + bool value; + + ret = kstrtobool(buf, &value); + if (ret) + return ret; + + if (value != mddev->fail_last_dev) + mddev->fail_last_dev = value; + + return len; +} +static struct md_sysfs_entry md_fail_last_dev = +__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show, + fail_last_dev_store); + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_layout.attr, @@ -5198,6 +5229,7 @@ static struct attribute *md_default_attrs[] = { &md_array_size.attr, &max_corr_read_errors.attr, &md_consistency_policy.attr, + &md_fail_last_dev.attr, NULL, }; @@ -9043,7 +9075,8 @@ void md_reap_sync_thread(struct mddev *mddev) /* resync has finished, collect result */ md_unregister_thread(&mddev->sync_thread); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + mddev->degraded != mddev->raid_disks) { /* success...*/ /* activate any spares */ if (mddev->pers->spare_active(mddev)) { diff --git a/drivers/md/md.h b/drivers/md/md.h index 10f98200e2f8..b742659150a2 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -487,6 +487,7 @@ struct mddev { unsigned int good_device_nr; /* good device num within cluster raid */ bool has_superblocks:1; + bool fail_last_dev:1; }; enum recovery_flags { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 34e26834ad28..6ea4f2679b78 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -447,19 +447,21 @@ static void raid1_end_write_request(struct bio *bio) /* We never try FailFast to WriteMostly devices */ !test_bit(WriteMostly, &rdev->flags)) { md_error(r1_bio->mddev, rdev); - if (!test_bit(Faulty, &rdev->flags)) - /* This is the only remaining device, - * We need to retry the write without - * FailFast - */ - set_bit(R1BIO_WriteError, &r1_bio->state); - else { - /* Finished with this branch */ - r1_bio->bios[mirror] = NULL; - to_put = bio; - } - } else + } + + /* + * When the device is faulty, it is not necessary to + * handle write error. + * For failfast, this is the only remaining device, + * We need to retry the write without FailFast. + */ + if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); + else { + /* Finished with this branch */ + r1_bio->bios[mirror] = NULL; + to_put = bio; + } } else { /* * Set R1BIO_Uptodate in our master bio, so that we @@ -872,8 +874,11 @@ static void flush_pending_writes(struct r1conf *conf) * backgroup IO calls must call raise_barrier. Once that returns * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. + * + * If resync/recovery is interrupted, returns -EINTR; + * Otherwise, returns 0. */ -static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr) +static int raise_barrier(struct r1conf *conf, sector_t sector_nr) { int idx = sector_to_idx(sector_nr); @@ -1612,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev) /* * If it is not operational, then we have already marked it as dead - * else if it is the last working disks, ignore the error, let the - * next level up know. + * else if it is the last working disks with "fail_last_dev == false", + * ignore the error, let the next level up know. * else mark the drive as failed */ spin_lock_irqsave(&conf->device_lock, flags); - if (test_bit(In_sync, &rdev->flags) + if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev && (conf->raid_disks - mddev->degraded) == 1) { /* * Don't fail the drive, act as though we were just a @@ -1901,6 +1906,22 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) } while (sectors_to_go > 0); } +static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate) +{ + if (atomic_dec_and_test(&r1_bio->remaining)) { + struct mddev *mddev = r1_bio->mddev; + int s = r1_bio->sectors; + + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, uptodate); + } + } +} + static void end_sync_write(struct bio *bio) { int uptodate = !bio->bi_status; @@ -1927,16 +1948,7 @@ static void end_sync_write(struct bio *bio) ) set_bit(R1BIO_MadeGood, &r1_bio->state); - if (atomic_dec_and_test(&r1_bio->remaining)) { - int s = r1_bio->sectors; - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || - test_bit(R1BIO_WriteError, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - put_buf(r1_bio); - md_done_sync(mddev, s, uptodate); - } - } + put_sync_write_buf(r1_bio, uptodate); } static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, @@ -2219,17 +2231,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) generic_make_request(wbio); } - if (atomic_dec_and_test(&r1_bio->remaining)) { - /* if we're here, all write(s) have completed, so clean up */ - int s = r1_bio->sectors; - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || - test_bit(R1BIO_WriteError, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - put_buf(r1_bio); - md_done_sync(mddev, s, 1); - } - } + put_sync_write_buf(r1_bio, 1); } /* diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8a1354a08a1a..299c7b1c9718 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -465,19 +465,21 @@ static void raid10_end_write_request(struct bio *bio) if (test_bit(FailFast, &rdev->flags) && (bio->bi_opf & MD_FAILFAST)) { md_error(rdev->mddev, rdev); - if (!test_bit(Faulty, &rdev->flags)) - /* This is the only remaining device, - * We need to retry the write without - * FailFast - */ - set_bit(R10BIO_WriteError, &r10_bio->state); - else { - r10_bio->devs[slot].bio = NULL; - to_put = bio; - dec_rdev = 1; - } - } else + } + + /* + * When the device is faulty, it is not necessary to + * handle write error. + * For failfast, this is the only remaining device, + * We need to retry the write without FailFast. + */ + if (!test_bit(Faulty, &rdev->flags)) set_bit(R10BIO_WriteError, &r10_bio->state); + else { + r10_bio->devs[slot].bio = NULL; + to_put = bio; + dec_rdev = 1; + } } } else { /* @@ -1638,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev) /* * If it is not operational, then we have already marked it as dead - * else if it is the last working disks, ignore the error, let the - * next level up know. + * else if it is the last working disks with "fail_last_dev == false", + * ignore the error, let the next level up know. * else mark the drive as failed */ spin_lock_irqsave(&conf->device_lock, flags); - if (test_bit(In_sync, &rdev->flags) + if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev && !enough(conf, rdev->raid_disk)) { /* * Don't fail the drive, just return an IO error. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3de4e13bde98..59cafafd5a5d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2558,7 +2558,9 @@ static void raid5_end_read_request(struct bio * bi) && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) retry = 1; if (retry) - if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { + if (sh->qd_idx >= 0 && sh->pd_idx == i) + set_bit(R5_ReadError, &sh->dev[i].flags); + else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { set_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); } else