dm cache: improve discard support

Safely allow the discard blocksize to be larger than the cache blocksize by using the bio prison's range locking support. This also improves discard performance considerly because larger discards are issued to the dm-cache device. The discard blocksize was always intended to be greater than the cache blocksize. But until now it wasn't implemented safely. Also, by safely restoring the ability to have discard blocksize larger than cache blocksize we're able to significantly reduce the memory used for the cache's discard bitset. Before, with a small discard blocksize, the discard bitset could get quite large because its size is a function of the discard blocksize and the origin device's size. For example, previously, using a 32KB cache blocksize with a 40TB origin resulted in 1280MB of incore memory use for the discard bitset! Now, the discard blocksize is scaled up accordingly to ensure the discard bitset is capped at 2**14 bits, or 16KB. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2014-11-06 10:18:04 +00:00 · 2014-11-06 10:18:04 +00:00 · 7ae34e7778
commit 7ae34e7778
parent 08b184514f
1 changed files with 123 additions and 47 deletions
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@ -310,6 +310,7 @@ struct dm_cache_migration {
 	dm_cblock_t cblock;

 	bool err:1;
+	bool discard:1;
 	bool writeback:1;
 	bool demote:1;
 	bool promote:1;
@ -433,12 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel

 /*----------------------------------------------------------------*/

-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
 {
 	key->virtual = 0;
 	key->dev = 0;
-	key->block_begin = from_oblock(oblock);
-	key->block_end = key->block_begin + 1ULL;
+	key->block_begin = from_oblock(begin);
+	key->block_end = from_oblock(end);
 }

 /*
@ -448,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
 */
 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);

-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-		      cell_free_fn free_fn, void *free_context,
-		      struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+			    struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+			    cell_free_fn free_fn, void *free_context,
+			    struct dm_bio_prison_cell **cell_result)
 {
 	int r;
 	struct dm_cell_key key;

-	build_key(oblock, &key);
+	build_key(oblock_begin, oblock_end, &key);
 	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
 	if (r)
 		free_fn(free_context, cell_prealloc);
@ -464,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock,
 	return r;
 }

+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+		      cell_free_fn free_fn, void *free_context,
+		      struct dm_bio_prison_cell **cell_result)
+{
+	dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+	return bio_detain_range(cache, oblock, end, bio,
+				cell_prealloc, free_fn, free_context, cell_result);
+}
+
 static int get_cell(struct cache *cache,
 		    dm_oblock_t oblock,
 		    struct prealloc *structs,
@ -475,7 +486,7 @@ static int get_cell(struct cache *cache,

 	cell_prealloc = prealloc_get_cell(structs);

-	build_key(oblock, &key);
+	build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
 	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
 	if (r)
 		prealloc_put_cell(structs, cell_prealloc);
@ -525,25 +536,34 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
 	return b;
 }

+static dm_block_t oblocks_per_dblock(struct cache *cache)
+{
+	dm_block_t oblocks = cache->discard_block_size;
+
+	if (block_size_is_power_of_two(cache))
+		oblocks >>= cache->sectors_per_block_shift;
+	else
+		oblocks = block_div(oblocks, cache->sectors_per_block);
+
+	return oblocks;
+}
+
 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
 {
-	uint32_t discard_blocks = cache->discard_block_size;
-	dm_block_t b = from_oblock(oblock);
+	return to_dblock(block_div(from_oblock(oblock),
+				   oblocks_per_dblock(cache)));
+}

-	if (!block_size_is_power_of_two(cache))
-		discard_blocks = discard_blocks / cache->sectors_per_block;
-	else
-		discard_blocks >>= cache->sectors_per_block_shift;
-
-	b = block_div(b, discard_blocks);
-
-	return to_dblock(b);
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+	return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
 }

 static void set_discard(struct cache *cache, dm_dblock_t b)
 {
 	unsigned long flags;

+	BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
 	atomic_inc(&cache->stats.discard_count);

 	spin_lock_irqsave(&cache->lock, flags);
@ -995,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	wake_worker(cache);
 }

-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
 {
 	int r;
 	struct dm_io_region o_region, c_region;
@ -1074,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg)
 	migration_success_pre_commit(mg);
 }

-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+				     dm_dblock_t *b, dm_dblock_t *e)
+{
+	sector_t sb = bio->bi_iter.bi_sector;
+	sector_t se = bio_end_sector(bio);
+
+	*b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+	if (se - sb < cache->discard_block_size)
+		*e = *b;
+	else
+		*e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+	dm_dblock_t b, e;
+	struct bio *bio = mg->new_ocell->holder;
+
+	calc_discard_block_range(mg->cache, bio, &b, &e);
+	while (b != e) {
+		set_discard(mg->cache, b);
+		b = to_dblock(from_dblock(b) + 1);
+	}
+
+	bio_endio(bio, 0);
+	cell_defer(mg->cache, mg->new_ocell, false);
+	free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
 {
 	bool avoid;
 	struct cache *cache = mg->cache;

+	if (mg->discard) {
+		issue_discard(mg);
+		return;
+	}
+
 	if (mg->writeback || mg->demote)
 		avoid = !is_dirty(cache, mg->cblock) ||
 			is_discarded_oblock(cache, mg->old_oblock);
@ -1093,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg)
 		}
 	}

-	avoid ? avoid_copy(mg) : issue_copy_real(mg);
+	avoid ? avoid_copy(mg) : issue_copy(mg);
 }

 static void complete_migration(struct dm_cache_migration *mg)
@ -1178,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);

 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = false;
 	mg->promote = true;
@ -1201,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);

 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = true;
 	mg->demote = false;
 	mg->promote = false;
@ -1226,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);

 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = true;
 	mg->promote = true;
@ -1254,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);

 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = true;
 	mg->promote = false;
@ -1270,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
 	quiesce_migration(mg);
 }

+static void discard(struct cache *cache, struct prealloc *structs,
+		    struct dm_bio_prison_cell *cell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->discard = true;
+	mg->writeback = false;
+	mg->demote = false;
+	mg->promote = false;
+	mg->requeue_holder = false;
+	mg->invalidate = false;
+	mg->cache = cache;
+	mg->old_ocell = NULL;
+	mg->new_ocell = cell;
+	mg->start_jiffies = jiffies;
+
+	quiesce_migration(mg);
+}
+
 /*----------------------------------------------------------------
 * bio processing
 *--------------------------------------------------------------*/
@ -1303,31 +1382,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
 	issue(cache, bio);
 }

-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting.  Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset.  No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+				struct bio *bio)
 {
-	dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
-						  cache->discard_block_size);
-	dm_block_t end_block = bio_end_sector(bio);
-	dm_block_t b;
+	int r;
+	dm_dblock_t b, e;
+	struct dm_bio_prison_cell *cell_prealloc, *new_ocell;

-	end_block = block_div(end_block, cache->discard_block_size);
+	calc_discard_block_range(cache, bio, &b, &e);
+	if (b == e) {
+		bio_endio(bio, 0);
+		return;
+	}

-	for (b = start_block; b < end_block; b++)
-		set_discard(cache, to_dblock(b));
+	cell_prealloc = prealloc_get_cell(structs);
+	r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+			     (cell_free_fn) prealloc_put_cell,
+			     structs, &new_ocell);
+	if (r > 0)
+		return;

-	bio_endio(bio, 0);
+	discard(cache, structs, new_ocell);
 }

 static bool spare_migration_bandwidth(struct cache *cache)
@ -1517,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache)
 		if (bio->bi_rw & REQ_FLUSH)
 			process_flush_bio(cache, bio);
 		else if (bio->bi_rw & REQ_DISCARD)
-			process_discard_bio(cache, bio);
+			process_discard_bio(cache, &structs, bio);
 		else
 			process_bio(cache, &structs, bio);
 	}
@ -1732,7 +1807,7 @@ static void do_worker(struct work_struct *ws)
 			process_invalidation_requests(cache);
 		}

-		process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+		process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
 		process_migrations(cache, &cache->completed_migrations, complete_migration);

 		if (commit_if_needed(cache)) {
@ -3130,7 +3205,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
 	/*
 	 * FIXME: these limits may be incompatible with the cache device
 	 */
-	limits->max_discard_sectors = cache->discard_block_size * 1024;
+	limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+					    cache->origin_sectors);
 	limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
 }

@ -3155,7 +3231,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)

 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 5, 0},
+	.version = {1, 6, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,