Btrfs: add fragment=* debug mount option
In tracking down these weird bitmap problems it was helpful to artificially create an extremely fragmented file system. These mount options let us either fragment data or metadata or both. With these options I could reproduce all sorts of weird latencies and hangs that occur under extreme fragmentation and get them fixed. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
d9ee522ba3
commit
d0bd456074
|
@ -2145,6 +2145,8 @@ struct btrfs_ioctl_defrag_range_args {
|
|||
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
|
||||
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
|
||||
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
|
||||
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
|
||||
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
|
||||
|
||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||
#define BTRFS_DEFAULT_MAX_INLINE (8192)
|
||||
|
@ -2169,6 +2171,18 @@ struct btrfs_ioctl_defrag_range_args {
|
|||
btrfs_clear_opt(root->fs_info->mount_opt, opt); \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline int
|
||||
btrfs_should_fragment_free_space(struct btrfs_root *root,
|
||||
struct btrfs_block_group_cache *block_group)
|
||||
{
|
||||
return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
(btrfs_test_opt(root, FRAGMENT_DATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Requests for changes that need to be done during transaction commit.
|
||||
*
|
||||
|
|
|
@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
|
|||
kfree(ctl);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static void fragment_free_space(struct btrfs_root *root,
|
||||
struct btrfs_block_group_cache *block_group)
|
||||
{
|
||||
u64 start = block_group->key.objectid;
|
||||
u64 len = block_group->key.offset;
|
||||
u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
|
||||
root->nodesize : root->sectorsize;
|
||||
u64 step = chunk << 1;
|
||||
|
||||
while (len > chunk) {
|
||||
btrfs_remove_free_space(block_group, start, chunk);
|
||||
start += step;
|
||||
if (len < step)
|
||||
len = 0;
|
||||
else
|
||||
len -= step;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* this is only called by cache_block_group, since we could have freed extents
|
||||
* we need to check the pinned_extents for any extents that can't be used yet
|
||||
|
@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
u64 last = 0;
|
||||
u32 nritems;
|
||||
int ret = -ENOMEM;
|
||||
bool wakeup = true;
|
||||
|
||||
caching_ctl = container_of(work, struct btrfs_caching_control, work);
|
||||
block_group = caching_ctl->block_group;
|
||||
|
@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
|
||||
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
/*
|
||||
* If we're fragmenting we don't want to make anybody think we can
|
||||
* allocate from this block group until we've had a chance to fragment
|
||||
* the free space.
|
||||
*/
|
||||
if (btrfs_should_fragment_free_space(extent_root, block_group))
|
||||
wakeup = false;
|
||||
#endif
|
||||
/*
|
||||
* We don't want to deadlock with somebody trying to allocate a new
|
||||
* extent for the extent root while also trying to search the extent
|
||||
|
@ -441,7 +472,8 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
|
||||
if (need_resched() ||
|
||||
rwsem_is_contended(&fs_info->commit_root_sem)) {
|
||||
caching_ctl->progress = last;
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
|
@ -464,7 +496,8 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
key.offset = 0;
|
||||
key.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
caching_ctl->progress = last;
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
goto next;
|
||||
}
|
||||
|
@ -491,7 +524,8 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
|
||||
if (total_found > (1024 * 1024 * 2)) {
|
||||
total_found = 0;
|
||||
wake_up(&caching_ctl->wait);
|
||||
if (wakeup)
|
||||
wake_up(&caching_ctl->wait);
|
||||
}
|
||||
}
|
||||
path->slots[0]++;
|
||||
|
@ -501,13 +535,27 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|||
total_found += add_new_free_space(block_group, fs_info, last,
|
||||
block_group->key.objectid +
|
||||
block_group->key.offset);
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
block_group->caching_ctl = NULL;
|
||||
block_group->cached = BTRFS_CACHE_FINISHED;
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(extent_root, block_group)) {
|
||||
u64 bytes_used;
|
||||
|
||||
spin_lock(&block_group->space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
bytes_used = block_group->key.offset -
|
||||
btrfs_block_group_used(&block_group->item);
|
||||
block_group->space_info->bytes_used += bytes_used >> 1;
|
||||
spin_unlock(&block_group->lock);
|
||||
spin_unlock(&block_group->space_info->lock);
|
||||
fragment_free_space(extent_root, block_group);
|
||||
}
|
||||
#endif
|
||||
|
||||
caching_ctl->progress = (u64)-1;
|
||||
err:
|
||||
btrfs_free_path(path);
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
|
@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
|
|||
}
|
||||
}
|
||||
spin_unlock(&cache->lock);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (ret == 1 &&
|
||||
btrfs_should_fragment_free_space(fs_info->extent_root,
|
||||
cache)) {
|
||||
u64 bytes_used;
|
||||
|
||||
spin_lock(&cache->space_info->lock);
|
||||
spin_lock(&cache->lock);
|
||||
bytes_used = cache->key.offset -
|
||||
btrfs_block_group_used(&cache->item);
|
||||
cache->space_info->bytes_used += bytes_used >> 1;
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
fragment_free_space(fs_info->extent_root, cache);
|
||||
}
|
||||
#endif
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
|
||||
wake_up(&caching_ctl->wait);
|
||||
|
@ -9624,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|||
|
||||
free_excluded_extents(root, cache);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(root, cache)) {
|
||||
u64 new_bytes_used = size - bytes_used;
|
||||
|
||||
bytes_used += new_bytes_used >> 1;
|
||||
fragment_free_space(root, cache);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Call to ensure the corresponding space_info object is created and
|
||||
* assigned to our block group, but don't update its counters just yet.
|
||||
|
|
|
@ -1951,12 +1951,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
|
|||
struct btrfs_free_space *info)
|
||||
{
|
||||
struct btrfs_block_group_cache *block_group = ctl->private;
|
||||
bool forced = false;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
|
||||
block_group))
|
||||
forced = true;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we are below the extents threshold then we can add this as an
|
||||
* extent, and don't have to deal with the bitmap
|
||||
*/
|
||||
if (ctl->free_extents < ctl->extents_thresh) {
|
||||
if (!forced && ctl->free_extents < ctl->extents_thresh) {
|
||||
/*
|
||||
* If this block group has some small extents we don't want to
|
||||
* use up all of our free slots in the cache with them, we want
|
||||
|
|
|
@ -303,6 +303,9 @@ enum {
|
|||
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
|
||||
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
|
||||
Opt_datasum, Opt_treelog, Opt_noinode_cache,
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
|
||||
#endif
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
|
@ -355,6 +358,11 @@ static match_table_t tokens = {
|
|||
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
|
||||
{Opt_fatal_errors, "fatal_errors=%s"},
|
||||
{Opt_commit_interval, "commit=%d"},
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
{Opt_fragment_data, "fragment=data"},
|
||||
{Opt_fragment_metadata, "fragment=metadata"},
|
||||
{Opt_fragment_all, "fragment=all"},
|
||||
#endif
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
|
@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
|
|||
info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
|
||||
}
|
||||
break;
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
case Opt_fragment_all:
|
||||
btrfs_info(root->fs_info, "fragmenting all space");
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
|
||||
break;
|
||||
case Opt_fragment_metadata:
|
||||
btrfs_info(root->fs_info, "fragmenting metadata");
|
||||
btrfs_set_opt(info->mount_opt,
|
||||
FRAGMENT_METADATA);
|
||||
break;
|
||||
case Opt_fragment_data:
|
||||
btrfs_info(root->fs_info, "fragmenting data");
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
|
||||
break;
|
||||
#endif
|
||||
case Opt_err:
|
||||
btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
|
||||
ret = -EINVAL;
|
||||
|
@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
|||
seq_puts(seq, ",fatal_errors=panic");
|
||||
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
|
||||
seq_printf(seq, ",commit=%d", info->commit_interval);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_test_opt(root, FRAGMENT_DATA))
|
||||
seq_puts(seq, ",fragment=data");
|
||||
if (btrfs_test_opt(root, FRAGMENT_METADATA))
|
||||
seq_puts(seq, ",fragment=metadata");
|
||||
#endif
|
||||
seq_printf(seq, ",subvolid=%llu",
|
||||
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
|
||||
seq_puts(seq, ",subvol=");
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include "btrfs-tests.h"
|
||||
#include "../ctree.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../free-space-cache.h"
|
||||
|
||||
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
|
||||
|
@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
|
|||
kfree(cache);
|
||||
return NULL;
|
||||
}
|
||||
cache->fs_info = btrfs_alloc_dummy_fs_info();
|
||||
if (!cache->fs_info) {
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cache->key.objectid = 0;
|
||||
cache->key.offset = 1024 * 1024 * 1024;
|
||||
|
@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
|
|||
int btrfs_test_free_space_cache(void)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
int ret;
|
||||
struct btrfs_root *root = NULL;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
test_msg("Running btrfs free space cache tests\n");
|
||||
|
||||
|
@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
root = btrfs_alloc_dummy_root();
|
||||
if (!root)
|
||||
goto out;
|
||||
|
||||
root->fs_info = btrfs_alloc_dummy_fs_info();
|
||||
if (!root->fs_info)
|
||||
goto out;
|
||||
|
||||
root->fs_info->extent_root = root;
|
||||
cache->fs_info = root->fs_info;
|
||||
|
||||
ret = test_extents(cache);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void)
|
|||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
btrfs_free_dummy_root(root);
|
||||
test_msg("Free space cache tests finished\n");
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user