diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b3856db8d6c2..ea1f1eb93c77 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -177,6 +177,13 @@ static inline void free_r1bio(r1bio_t *r1_bio) static inline void put_buf(r1bio_t *r1_bio) { conf_t *conf = mddev_to_conf(r1_bio->mddev); + int i; + + for (i=0; iraid_disks; i++) { + struct bio *bio = r1_bio->bios[i]; + if (bio->bi_end_io) + rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); + } mempool_free(r1_bio, conf->r1buf_pool); @@ -1085,7 +1092,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) conf->mirrors[r1_bio->read_disk].rdev); } else set_bit(R1BIO_Uptodate, &r1_bio->state); - rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); reschedule_retry(r1_bio); return 0; } @@ -1116,7 +1122,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) md_done_sync(mddev, r1_bio->sectors, uptodate); put_buf(r1_bio); } - rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); return 0; } @@ -1153,10 +1158,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) atomic_set(&r1_bio->remaining, 1); for (i = 0; i < disks ; i++) { wbio = r1_bio->bios[i]; - if (wbio->bi_end_io != end_sync_write) + if (wbio->bi_end_io == NULL || + (wbio->bi_end_io == end_sync_read && + (i == r1_bio->read_disk || + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; - atomic_inc(&conf->mirrors[i].rdev->nr_pending); + wbio->bi_rw = WRITE; + wbio->bi_end_io = end_sync_write; atomic_inc(&r1_bio->remaining); md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); @@ -1388,14 +1397,13 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { conf_t *conf = mddev_to_conf(mddev); - mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *bio; sector_t max_sector, nr_sectors; - int disk; + int disk = -1; int i; - int wonly; - int write_targets = 0; + int wonly = -1; + int write_targets = 0, read_targets = 0; int sync_blocks; int still_degraded = 0; @@ -1447,44 +1455,24 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i conf->next_resync = sector_nr; - /* - * If reconstructing, and >1 working disc, - * could dedicate one to rebuild and others to - * service read requests .. - */ - disk = conf->last_used; - /* make sure disk is operational */ - wonly = disk; - while (conf->mirrors[disk].rdev == NULL || - !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) || - test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) - ) { - if (conf->mirrors[disk].rdev && - test_bit(In_sync, &conf->mirrors[disk].rdev->flags)) - wonly = disk; - if (disk <= 0) - disk = conf->raid_disks; - disk--; - if (disk == conf->last_used) { - disk = wonly; - break; - } - } - conf->last_used = disk; - atomic_inc(&conf->mirrors[disk].rdev->nr_pending); - - - mirror = conf->mirrors + disk; - r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + rcu_read_lock(); + /* + * If we get a correctably read error during resync or recovery, + * we might want to read from a different device. So we + * flag all drives that could conceivably be read from for READ, + * and any others (which will be non-In_sync devices) for WRITE. + * If a read fails, we try reading from something else for which READ + * is OK. + */ r1_bio->mddev = mddev; r1_bio->sector = sector_nr; r1_bio->state = 0; set_bit(R1BIO_IsSync, &r1_bio->state); - r1_bio->read_disk = disk; for (i=0; i < conf->raid_disks; i++) { + mdk_rdev_t *rdev; bio = r1_bio->bios[i]; /* take from bio_init */ @@ -1499,35 +1487,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_end_io = NULL; bio->bi_private = NULL; - if (i == disk) { - bio->bi_rw = READ; - bio->bi_end_io = end_sync_read; - } else if (conf->mirrors[i].rdev == NULL || - test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { + rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev == NULL || + test_bit(Faulty, &rdev->flags)) { still_degraded = 1; continue; - } else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || - sector_nr + RESYNC_SECTORS > mddev->recovery_cp || - test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; - } else - /* no need to read or write here */ - continue; - bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; - bio->bi_bdev = conf->mirrors[i].rdev->bdev; + } else { + /* may need to read from here */ + bio->bi_rw = READ; + bio->bi_end_io = end_sync_read; + if (test_bit(WriteMostly, &rdev->flags)) { + if (wonly < 0) + wonly = i; + } else { + if (disk < 0) + disk = i; + } + read_targets++; + } + atomic_inc(&rdev->nr_pending); + bio->bi_sector = sector_nr + rdev->data_offset; + bio->bi_bdev = rdev->bdev; bio->bi_private = r1_bio; } + rcu_read_unlock(); + if (disk < 0) + disk = wonly; + r1_bio->read_disk = disk; - if (write_targets == 0) { + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) + /* extra read targets are also write targets */ + write_targets += read_targets-1; + + if (write_targets == 0 || read_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished */ sector_t rv = max_sector - sector_nr; *skipped = 1; put_buf(r1_bio); - rdev_dec_pending(conf->mirrors[disk].rdev, mddev); return rv; } @@ -1578,10 +1580,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i sync_blocks -= (len>>9); } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); bio_full: - bio = r1_bio->bios[disk]; + bio = r1_bio->bios[r1_bio->read_disk]; r1_bio->sectors = nr_sectors; - md_sync_acct(mirror->rdev->bdev, nr_sectors); + md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors); generic_make_request(bio);