forked from luck/tmp_suning_uos_patched
Btrfs: add and improve comments
This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
9a5e1ea1e1
commit
d352ac6814
|
@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
|
||||||
obj-m := btrfs.o
|
obj-m := btrfs.o
|
||||||
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||||
file-item.o inode-item.o inode-map.o disk-io.o \
|
file-item.o inode-item.o inode-map.o disk-io.o \
|
||||||
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
|
transaction.o inode.o file.o tree-defrag.o \
|
||||||
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
||||||
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
||||||
ref-cache.o export.o tree-log.o acl.o free-space-cache.o
|
ref-cache.o export.o tree-log.o acl.o free-space-cache.o
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
* cleanup, add more error checking, get rid of BUG_ONs
|
|
||||||
* Fix ENOSPC handling
|
|
||||||
* Make allocator smarter
|
|
||||||
* add a block group to struct inode
|
|
||||||
* Do actual block accounting
|
|
||||||
* Check compat and incompat flags on the inode
|
|
||||||
* Get rid of struct ctree_path, limiting tree levels held at one time
|
|
||||||
* Add generation number to key pointer in nodes
|
|
||||||
* Add generation number to inode
|
|
||||||
* forbid cross subvolume renames and hardlinks
|
|
||||||
* Release
|
|
||||||
* Do real tree locking
|
|
||||||
* Add extent mirroring (backup copies of blocks)
|
|
||||||
* Add fancy interface to get access to incremental backups
|
|
||||||
* Add fancy striped extents to make big reads faster
|
|
||||||
* Use relocation to try and fix write errors
|
|
||||||
* Make allocator much smarter
|
|
||||||
* xattrs (directory streams for regular files)
|
|
||||||
* Scrub & defrag
|
|
||||||
|
|
|
@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we pick a busy task, move the task to the end of the list.
|
* if we pick a busy task, move the task to the end of the list.
|
||||||
* hopefully this will keep things somewhat evenly balanced
|
* hopefully this will keep things somewhat evenly balanced.
|
||||||
|
* Do the move in batches based on the sequence number. This groups
|
||||||
|
* requests submitted at roughly the same time onto the same worker.
|
||||||
*/
|
*/
|
||||||
next = workers->worker_list.next;
|
next = workers->worker_list.next;
|
||||||
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
|
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
|
||||||
atomic_inc(&worker->num_pending);
|
atomic_inc(&worker->num_pending);
|
||||||
worker->sequence++;
|
worker->sequence++;
|
||||||
|
|
||||||
if (worker->sequence % workers->idle_thresh == 0)
|
if (worker->sequence % workers->idle_thresh == 0)
|
||||||
list_move_tail(next, &workers->worker_list);
|
list_move_tail(next, &workers->worker_list);
|
||||||
return worker;
|
return worker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* selects a worker thread to take the next job. This will either find
|
||||||
|
* an idle worker, start a new worker up to the max count, or just return
|
||||||
|
* one of the existing busy workers.
|
||||||
|
*/
|
||||||
static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
|
static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
|
||||||
{
|
{
|
||||||
struct btrfs_worker_thread *worker;
|
struct btrfs_worker_thread *worker;
|
||||||
|
|
|
@ -63,14 +63,17 @@ struct btrfs_workers {
|
||||||
/* once a worker has this many requests or fewer, it is idle */
|
/* once a worker has this many requests or fewer, it is idle */
|
||||||
int idle_thresh;
|
int idle_thresh;
|
||||||
|
|
||||||
/* list with all the work threads */
|
/* list with all the work threads. The workers on the idle thread
|
||||||
|
* may be actively servicing jobs, but they haven't yet hit the
|
||||||
|
* idle thresh limit above.
|
||||||
|
*/
|
||||||
struct list_head worker_list;
|
struct list_head worker_list;
|
||||||
struct list_head idle_list;
|
struct list_head idle_list;
|
||||||
|
|
||||||
/* lock for finding the next worker thread to queue on */
|
/* lock for finding the next worker thread to queue on */
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
|
||||||
/* extra name for this worker */
|
/* extra name for this worker, used for current->name */
|
||||||
char *name;
|
char *name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,130 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public
|
|
||||||
* License v2 as published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public
|
|
||||||
* License along with this program; if not, write to the
|
|
||||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
||||||
* Boston, MA 021110-1307, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "bit-radix.h"
|
|
||||||
|
|
||||||
#define BIT_ARRAY_BYTES 256
|
|
||||||
#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
|
|
||||||
|
|
||||||
extern struct kmem_cache *btrfs_bit_radix_cachep;
|
|
||||||
int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
|
|
||||||
{
|
|
||||||
unsigned long *bits;
|
|
||||||
unsigned long slot;
|
|
||||||
int bit_slot;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
slot = bit / BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
|
|
||||||
bits = radix_tree_lookup(radix, slot);
|
|
||||||
if (!bits) {
|
|
||||||
bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
|
|
||||||
if (!bits)
|
|
||||||
return -ENOMEM;
|
|
||||||
memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
|
|
||||||
bits[0] = slot;
|
|
||||||
ret = radix_tree_insert(radix, slot, bits);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
ret = test_and_set_bit(bit_slot, bits + 1);
|
|
||||||
if (ret < 0)
|
|
||||||
ret = 1;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
|
|
||||||
{
|
|
||||||
unsigned long *bits;
|
|
||||||
unsigned long slot;
|
|
||||||
int bit_slot;
|
|
||||||
|
|
||||||
slot = bit / BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
|
|
||||||
bits = radix_tree_lookup(radix, slot);
|
|
||||||
if (!bits)
|
|
||||||
return 0;
|
|
||||||
return test_bit(bit_slot, bits + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
|
|
||||||
{
|
|
||||||
unsigned long *bits;
|
|
||||||
unsigned long slot;
|
|
||||||
int bit_slot;
|
|
||||||
int i;
|
|
||||||
int empty = 1;
|
|
||||||
|
|
||||||
slot = bit / BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
|
|
||||||
bits = radix_tree_lookup(radix, slot);
|
|
||||||
if (!bits)
|
|
||||||
return 0;
|
|
||||||
clear_bit(bit_slot, bits + 1);
|
|
||||||
for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
|
|
||||||
if (bits[i]) {
|
|
||||||
empty = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (empty) {
|
|
||||||
bits = radix_tree_delete(radix, slot);
|
|
||||||
BUG_ON(!bits);
|
|
||||||
kmem_cache_free(btrfs_bit_radix_cachep, bits);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
|
|
||||||
unsigned long start, int nr)
|
|
||||||
{
|
|
||||||
unsigned long *bits;
|
|
||||||
unsigned long *gang[4];
|
|
||||||
int found;
|
|
||||||
int ret;
|
|
||||||
int i;
|
|
||||||
int total_found = 0;
|
|
||||||
unsigned long slot;
|
|
||||||
|
|
||||||
slot = start / BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
|
|
||||||
ARRAY_SIZE(gang));
|
|
||||||
found = start % BIT_RADIX_BITS_PER_ARRAY;
|
|
||||||
for (i = 0; i < ret && nr > 0; i++) {
|
|
||||||
bits = gang[i];
|
|
||||||
while(nr > 0) {
|
|
||||||
found = find_next_bit(bits + 1,
|
|
||||||
BIT_RADIX_BITS_PER_ARRAY,
|
|
||||||
found);
|
|
||||||
if (found < BIT_RADIX_BITS_PER_ARRAY) {
|
|
||||||
*retbits = bits[0] *
|
|
||||||
BIT_RADIX_BITS_PER_ARRAY + found;
|
|
||||||
retbits++;
|
|
||||||
nr--;
|
|
||||||
total_found++;
|
|
||||||
found++;
|
|
||||||
} else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
found = 0;
|
|
||||||
}
|
|
||||||
return total_found;
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public
|
|
||||||
* License v2 as published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public
|
|
||||||
* License along with this program; if not, write to the
|
|
||||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
||||||
* Boston, MA 021110-1307, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __BIT_RADIX__
|
|
||||||
#define __BIT_RADIX__
|
|
||||||
#include <linux/radix-tree.h>
|
|
||||||
|
|
||||||
int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
|
|
||||||
int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
|
|
||||||
int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
|
|
||||||
int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
|
|
||||||
unsigned long start, int nr);
|
|
||||||
|
|
||||||
static inline void init_bit_radix(struct radix_tree_root *radix)
|
|
||||||
{
|
|
||||||
INIT_RADIX_TREE(radix, GFP_NOFS);
|
|
||||||
}
|
|
||||||
#endif
|
|
|
@ -25,27 +25,58 @@
|
||||||
|
|
||||||
/* in memory btrfs inode */
|
/* in memory btrfs inode */
|
||||||
struct btrfs_inode {
|
struct btrfs_inode {
|
||||||
|
/* which subvolume this inode belongs to */
|
||||||
struct btrfs_root *root;
|
struct btrfs_root *root;
|
||||||
|
|
||||||
|
/* the block group preferred for allocations. This pointer is buggy
|
||||||
|
* and needs to be replaced with a bytenr instead
|
||||||
|
*/
|
||||||
struct btrfs_block_group_cache *block_group;
|
struct btrfs_block_group_cache *block_group;
|
||||||
|
|
||||||
|
/* key used to find this inode on disk. This is used by the code
|
||||||
|
* to read in roots of subvolumes
|
||||||
|
*/
|
||||||
struct btrfs_key location;
|
struct btrfs_key location;
|
||||||
|
|
||||||
|
/* the extent_tree has caches of all the extent mappings to disk */
|
||||||
struct extent_map_tree extent_tree;
|
struct extent_map_tree extent_tree;
|
||||||
|
|
||||||
|
/* the io_tree does range state (DIRTY, LOCKED etc) */
|
||||||
struct extent_io_tree io_tree;
|
struct extent_io_tree io_tree;
|
||||||
|
|
||||||
|
/* special utility tree used to record which mirrors have already been
|
||||||
|
* tried when checksums fail for a given block
|
||||||
|
*/
|
||||||
struct extent_io_tree io_failure_tree;
|
struct extent_io_tree io_failure_tree;
|
||||||
|
|
||||||
|
/* held while inserting checksums to avoid races */
|
||||||
struct mutex csum_mutex;
|
struct mutex csum_mutex;
|
||||||
|
|
||||||
|
/* held while inesrting or deleting extents from files */
|
||||||
struct mutex extent_mutex;
|
struct mutex extent_mutex;
|
||||||
|
|
||||||
|
/* held while logging the inode in tree-log.c */
|
||||||
struct mutex log_mutex;
|
struct mutex log_mutex;
|
||||||
struct inode vfs_inode;
|
|
||||||
|
/* used to order data wrt metadata */
|
||||||
struct btrfs_ordered_inode_tree ordered_tree;
|
struct btrfs_ordered_inode_tree ordered_tree;
|
||||||
|
|
||||||
|
/* standard acl pointers */
|
||||||
struct posix_acl *i_acl;
|
struct posix_acl *i_acl;
|
||||||
struct posix_acl *i_default_acl;
|
struct posix_acl *i_default_acl;
|
||||||
|
|
||||||
/* for keeping track of orphaned inodes */
|
/* for keeping track of orphaned inodes */
|
||||||
struct list_head i_orphan;
|
struct list_head i_orphan;
|
||||||
|
|
||||||
|
/* list of all the delalloc inodes in the FS. There are times we need
|
||||||
|
* to write all the delalloc pages to disk, and this list is used
|
||||||
|
* to walk them all.
|
||||||
|
*/
|
||||||
struct list_head delalloc_inodes;
|
struct list_head delalloc_inodes;
|
||||||
|
|
||||||
/* full 64 bit generation number */
|
/* full 64 bit generation number, struct vfs_inode doesn't have a big
|
||||||
|
* enough field for this.
|
||||||
|
*/
|
||||||
u64 generation;
|
u64 generation;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -57,10 +88,25 @@ struct btrfs_inode {
|
||||||
*/
|
*/
|
||||||
u64 logged_trans;
|
u64 logged_trans;
|
||||||
|
|
||||||
/* trans that last made a change that should be fully fsync'd */
|
/*
|
||||||
|
* trans that last made a change that should be fully fsync'd. This
|
||||||
|
* gets reset to zero each time the inode is logged
|
||||||
|
*/
|
||||||
u64 log_dirty_trans;
|
u64 log_dirty_trans;
|
||||||
|
|
||||||
|
/* total number of bytes pending delalloc, used by stat to calc the
|
||||||
|
* real block usage of the file
|
||||||
|
*/
|
||||||
u64 delalloc_bytes;
|
u64 delalloc_bytes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* the size of the file stored in the metadata on disk. data=ordered
|
||||||
|
* means the in-memory i_size might be larger than the size on disk
|
||||||
|
* because not all the blocks are written yet.
|
||||||
|
*/
|
||||||
u64 disk_i_size;
|
u64 disk_i_size;
|
||||||
|
|
||||||
|
/* flags field from the on disk inode */
|
||||||
u32 flags;
|
u32 flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -68,6 +114,8 @@ struct btrfs_inode {
|
||||||
* number for new files that are created
|
* number for new files that are created
|
||||||
*/
|
*/
|
||||||
u64 index_cnt;
|
u64 index_cnt;
|
||||||
|
|
||||||
|
struct inode vfs_inode;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
|
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
|
||||||
|
|
|
@ -1,3 +1,21 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License v2 as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __BTRFS_CRC32C__
|
#ifndef __BTRFS_CRC32C__
|
||||||
#define __BTRFS_CRC32C__
|
#define __BTRFS_CRC32C__
|
||||||
#include <asm/byteorder.h>
|
#include <asm/byteorder.h>
|
||||||
|
|
127
fs/btrfs/ctree.c
127
fs/btrfs/ctree.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
* Copyright (C) 2007,2008 Oracle. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or
|
* This program is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU General Public
|
* modify it under the terms of the GNU General Public
|
||||||
|
@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void)
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* this also releases the path */
|
||||||
void btrfs_free_path(struct btrfs_path *p)
|
void btrfs_free_path(struct btrfs_path *p)
|
||||||
{
|
{
|
||||||
btrfs_release_path(NULL, p);
|
btrfs_release_path(NULL, p);
|
||||||
kmem_cache_free(btrfs_path_cachep, p);
|
kmem_cache_free(btrfs_path_cachep, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* path release drops references on the extent buffers in the path
|
||||||
|
* and it drops any locks held by this path
|
||||||
|
*
|
||||||
|
* It is safe to call this on paths that no locks or extent buffers held.
|
||||||
|
*/
|
||||||
void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
|
void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* safely gets a reference on the root node of a tree. A lock
|
||||||
|
* is not taken, so a concurrent writer may put a different node
|
||||||
|
* at the root of the tree. See btrfs_lock_root_node for the
|
||||||
|
* looping required.
|
||||||
|
*
|
||||||
|
* The extent buffer returned by this has a reference taken, so
|
||||||
|
* it won't disappear. It may stop being the root of the tree
|
||||||
|
* at any time because there are no locks held.
|
||||||
|
*/
|
||||||
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
|
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
|
||||||
return eb;
|
return eb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* loop around taking references on and locking the root node of the
|
||||||
|
* tree until you end up with a lock on the root. A locked buffer
|
||||||
|
* is returned, with a reference held.
|
||||||
|
*/
|
||||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||||
return eb;
|
return eb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* cowonly root (everything not a reference counted cow subvolume), just get
|
||||||
|
* put onto a simple dirty list. transaction.c walks this to make sure they
|
||||||
|
* get properly updated on disk.
|
||||||
|
*/
|
||||||
static void add_root_to_dirty_list(struct btrfs_root *root)
|
static void add_root_to_dirty_list(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
if (root->track_dirty && list_empty(&root->dirty_list)) {
|
if (root->track_dirty && list_empty(&root->dirty_list)) {
|
||||||
|
@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* used by snapshot creation to make a copy of a root for a tree with
|
||||||
|
* a given objectid. The buffer with the new root node is returned in
|
||||||
|
* cow_ret, and this func returns zero on success or a negative error code.
|
||||||
|
*/
|
||||||
int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct extent_buffer *buf,
|
struct extent_buffer *buf,
|
||||||
|
@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* does the dirty work in cow of a single block. The parent block
|
||||||
|
* (if supplied) is updated to point to the new cow copy. The new
|
||||||
|
* buffer is marked dirty and returned locked. If you modify the block
|
||||||
|
* it needs to be marked dirty again.
|
||||||
|
*
|
||||||
|
* search_start -- an allocation hint for the new block
|
||||||
|
*
|
||||||
|
* empty_size -- a hint that you plan on doing more cow. This is the size in bytes
|
||||||
|
* the allocator should try to find free next to the block it returns. This is
|
||||||
|
* just a hint and may be ignored by the allocator.
|
||||||
|
*
|
||||||
|
* prealloc_dest -- if you have already reserved a destination for the cow,
|
||||||
|
* this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent
|
||||||
|
* is used to finish the allocation.
|
||||||
|
*/
|
||||||
int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct extent_buffer *buf,
|
struct extent_buffer *buf,
|
||||||
|
@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cows a single block, see __btrfs_cow_block for the real work.
|
||||||
|
* This version of it has extra checks so that a block isn't cow'd more than
|
||||||
|
* once per transaction, as long as it hasn't been written yet
|
||||||
|
*/
|
||||||
int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
|
int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct extent_buffer *buf,
|
struct btrfs_root *root, struct extent_buffer *buf,
|
||||||
struct extent_buffer *parent, int parent_slot,
|
struct extent_buffer *parent, int parent_slot,
|
||||||
|
@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helper function for defrag to decide if two blocks pointed to by a
|
||||||
|
* node are actually close by
|
||||||
|
*/
|
||||||
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
|
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
|
||||||
{
|
{
|
||||||
if (blocknr < other && other - (blocknr + blocksize) < 32768)
|
if (blocknr < other && other - (blocknr + blocksize) < 32768)
|
||||||
|
@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this is used by the defrag code to go through all the
|
||||||
|
* leaves pointed to by a node and reallocate them so that
|
||||||
|
* disk order is close to key order
|
||||||
|
*/
|
||||||
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct extent_buffer *parent,
|
struct btrfs_root *root, struct extent_buffer *parent,
|
||||||
int start_slot, int cache_only, u64 *last_ret,
|
int start_slot, int cache_only, u64 *last_ret,
|
||||||
|
@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
|
||||||
return btrfs_item_offset_nr(leaf, nr - 1);
|
return btrfs_item_offset_nr(leaf, nr - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extra debugging checks to make sure all the items in a key are
|
||||||
|
* well formed and in the proper order
|
||||||
|
*/
|
||||||
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
|
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
int level)
|
int level)
|
||||||
{
|
{
|
||||||
|
@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extra checking to make sure all the items in a leaf are
|
||||||
|
* well formed and in the proper order
|
||||||
|
*/
|
||||||
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
int level)
|
int level)
|
||||||
{
|
{
|
||||||
|
@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* given a node and slot number, this reads the blocks it points to. The
|
||||||
|
* extent buffer is returned with a reference taken (but unlocked).
|
||||||
|
* NULL is returned on error.
|
||||||
|
*/
|
||||||
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
|
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
|
||||||
struct extent_buffer *parent, int slot)
|
struct extent_buffer *parent, int slot)
|
||||||
{
|
{
|
||||||
|
@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
|
||||||
btrfs_node_ptr_generation(parent, slot));
|
btrfs_node_ptr_generation(parent, slot));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* node level balancing, used to make sure nodes are in proper order for
|
||||||
|
* item deletion. We balance from the top down, so we have to make sure
|
||||||
|
* that a deletion won't leave an node completely empty later on.
|
||||||
|
*/
|
||||||
static noinline int balance_level(struct btrfs_trans_handle *trans,
|
static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path, int level)
|
struct btrfs_path *path, int level)
|
||||||
|
@ -1024,7 +1101,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns zero if the push worked, non-zero otherwise */
|
/* Node balancing for insertion. Here we only split or push nodes around
|
||||||
|
* when they are completely full. This is also done top down, so we
|
||||||
|
* have to be pessimistic.
|
||||||
|
*/
|
||||||
static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path, int level)
|
struct btrfs_path *path, int level)
|
||||||
|
@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* readahead one full node of leaves
|
* readahead one full node of leaves, finding things that are close
|
||||||
|
* to the block in 'slot', and triggering ra on them.
|
||||||
*/
|
*/
|
||||||
static noinline void reada_for_search(struct btrfs_root *root,
|
static noinline void reada_for_search(struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
|
@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when we walk down the tree, it is usually safe to unlock the higher layers in
|
||||||
|
* the tree. The exceptions are when our path goes through slot 0, because operations
|
||||||
|
* on the tree might require changing key pointers higher up in the tree.
|
||||||
|
*
|
||||||
|
* callers might also have set path->keep_locks, which tells this code to
|
||||||
|
* keep the lock if the path points to the last slot in the block. This is
|
||||||
|
* part of walking through the tree, and selecting the next slot in the higher
|
||||||
|
* block.
|
||||||
|
*
|
||||||
|
* lowest_unlock sets the lowest level in the tree we're allowed to unlock.
|
||||||
|
* so if lowest_unlock is 1, level 0 won't be unlocked
|
||||||
|
*/
|
||||||
static noinline void unlock_up(struct btrfs_path *path, int level,
|
static noinline void unlock_up(struct btrfs_path *path, int level,
|
||||||
int lowest_unlock)
|
int lowest_unlock)
|
||||||
{
|
{
|
||||||
|
@ -2705,6 +2799,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* make the item pointed to by the path smaller. new_size indicates
|
||||||
|
* how small to make it, and from_end tells us if we just chop bytes
|
||||||
|
* off the end of the item or if we shift the item to chop bytes off
|
||||||
|
* the front.
|
||||||
|
*/
|
||||||
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
|
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
|
@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* make the item pointed to by the path bigger, data_size is the new size.
|
||||||
|
*/
|
||||||
int btrfs_extend_item(struct btrfs_trans_handle *trans,
|
int btrfs_extend_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct btrfs_path *path,
|
struct btrfs_root *root, struct btrfs_path *path,
|
||||||
u32 data_size)
|
u32 data_size)
|
||||||
|
@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Given a key and some data, insert an item into the tree.
|
* Given a key and some data, insert items into the tree.
|
||||||
* This does all the path init required, making room in the tree if needed.
|
* This does all the path init required, making room in the tree if needed.
|
||||||
*/
|
*/
|
||||||
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||||
|
@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
/*
|
/*
|
||||||
* delete the pointer from a given node.
|
* delete the pointer from a given node.
|
||||||
*
|
*
|
||||||
* If the delete empties a node, the node is removed from the tree,
|
* the tree should have been previously balanced so the deletion does not
|
||||||
* continuing all the way the root if required. The root is converted into
|
* empty a node.
|
||||||
* a leaf if all the nodes are emptied.
|
|
||||||
*/
|
*/
|
||||||
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
struct btrfs_path *path, int level, int slot)
|
struct btrfs_path *path, int level, int slot)
|
||||||
|
@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
* search the tree again to find a leaf with lesser keys
|
* search the tree again to find a leaf with lesser keys
|
||||||
* returns 0 if it found something or 1 if there are no lesser leaves.
|
* returns 0 if it found something or 1 if there are no lesser leaves.
|
||||||
* returns < 0 on io errors.
|
* returns < 0 on io errors.
|
||||||
|
*
|
||||||
|
* This may release the path, and so you may lose any locks held at the
|
||||||
|
* time you call it.
|
||||||
*/
|
*/
|
||||||
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
|
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
|
||||||
{
|
{
|
||||||
|
@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
|
||||||
/*
|
/*
|
||||||
* A helper function to walk down the tree starting at min_key, and looking
|
* A helper function to walk down the tree starting at min_key, and looking
|
||||||
* for nodes or leaves that are either in cache or have a minimum
|
* for nodes or leaves that are either in cache or have a minimum
|
||||||
* transaction id. This is used by the btree defrag code, but could
|
* transaction id. This is used by the btree defrag code, and tree logging
|
||||||
* also be used to search for blocks that have changed since a given
|
|
||||||
* transaction id.
|
|
||||||
*
|
*
|
||||||
* This does not cow, but it does stuff the starting key it finds back
|
* This does not cow, but it does stuff the starting key it finds back
|
||||||
* into min_key, so you can call btrfs_search_slot with cow=1 on the
|
* into min_key, so you can call btrfs_search_slot with cow=1 on the
|
||||||
|
@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
|
||||||
* This honors path->lowest_level to prevent descent past a given level
|
* This honors path->lowest_level to prevent descent past a given level
|
||||||
* of the tree.
|
* of the tree.
|
||||||
*
|
*
|
||||||
|
* min_trans indicates the oldest transaction that you are interested
|
||||||
|
* in walking through. Any nodes or leaves older than min_trans are
|
||||||
|
* skipped over (without reading them).
|
||||||
|
*
|
||||||
* returns zero if something useful was found, < 0 on error and 1 if there
|
* returns zero if something useful was found, < 0 on error and 1 if there
|
||||||
* was nothing in the tree that matched the search criteria.
|
* was nothing in the tree that matched the search criteria.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
#include <linux/backing-dev.h>
|
#include <linux/backing-dev.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
#include <asm/kmap_types.h>
|
#include <asm/kmap_types.h>
|
||||||
#include "bit-radix.h"
|
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
#include "extent_map.h"
|
#include "extent_map.h"
|
||||||
#include "async-thread.h"
|
#include "async-thread.h"
|
||||||
|
|
|
@ -21,6 +21,14 @@
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* insert a name into a directory, doing overflow properly if there is a hash
|
||||||
|
* collision. data_size indicates how big the item inserted should be. On
|
||||||
|
* success a struct btrfs_dir_item pointer is returned, otherwise it is
|
||||||
|
* an ERR_PTR.
|
||||||
|
*
|
||||||
|
* The name is not copied into the dir item, you have to do that yourself.
|
||||||
|
*/
|
||||||
static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||||
*trans,
|
*trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
|
@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||||
return (struct btrfs_dir_item *)ptr;
|
return (struct btrfs_dir_item *)ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* xattrs work a lot like directories, this inserts an xattr item
|
||||||
|
* into the tree
|
||||||
|
*/
|
||||||
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, const char *name,
|
struct btrfs_root *root, const char *name,
|
||||||
u16 name_len, const void *data, u16 data_len,
|
u16 name_len, const void *data, u16 data_len,
|
||||||
|
@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* insert a directory item in the tree, doing all the magic for
|
||||||
|
* both indexes. 'dir' indicates which objectid to insert it into,
|
||||||
|
* 'location' is the key to stuff into the directory item, 'type' is the
|
||||||
|
* type of the inode we're pointing to, and 'index' is the sequence number
|
||||||
|
* to use for the second index (if one is created).
|
||||||
|
*/
|
||||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
|
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
*root, const char *name, int name_len, u64 dir,
|
*root, const char *name, int name_len, u64 dir,
|
||||||
struct btrfs_key *location, u8 type, u64 index)
|
struct btrfs_key *location, u8 type, u64 index)
|
||||||
|
@ -184,6 +203,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lookup a directory item based on name. 'dir' is the objectid
|
||||||
|
* we're searching in, and 'mod' tells us if you plan on deleting the
|
||||||
|
* item (use mod < 0) or changing the options (use mod > 0)
|
||||||
|
*/
|
||||||
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path, u64 dir,
|
struct btrfs_path *path, u64 dir,
|
||||||
|
@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||||
return btrfs_match_dir_item_name(root, path, name, name_len);
|
return btrfs_match_dir_item_name(root, path, name, name_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lookup a directory item based on index. 'dir' is the objectid
|
||||||
|
* we're searching in, and 'mod' tells us if you plan on deleting the
|
||||||
|
* item (use mod < 0) or changing the options (use mod > 0)
|
||||||
|
*
|
||||||
|
* The name is used to make sure the index really points to the name you were
|
||||||
|
* looking for.
|
||||||
|
*/
|
||||||
struct btrfs_dir_item *
|
struct btrfs_dir_item *
|
||||||
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
|
@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||||
return btrfs_match_dir_item_name(root, path, name, name_len);
|
return btrfs_match_dir_item_name(root, path, name, name_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helper function to look at the directory item pointed to by 'path'
|
||||||
|
* this walks through all the entries in a dir item and finds one
|
||||||
|
* for a specific name.
|
||||||
|
*/
|
||||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
const char *name, int name_len)
|
const char *name, int name_len)
|
||||||
|
@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* given a pointer into a directory item, delete it. This
|
||||||
|
* handles items that have more than one entry in them.
|
||||||
|
*/
|
||||||
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
|
|
|
@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
|
||||||
static struct extent_io_ops btree_extent_io_ops;
|
static struct extent_io_ops btree_extent_io_ops;
|
||||||
static void end_workqueue_fn(struct btrfs_work *work);
|
static void end_workqueue_fn(struct btrfs_work *work);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* end_io_wq structs are used to do processing in task context when an IO is
|
||||||
|
* complete. This is used during reads to verify checksums, and it is used
|
||||||
|
* by writes to insert metadata for new file extents after IO is complete.
|
||||||
|
*/
|
||||||
struct end_io_wq {
|
struct end_io_wq {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
bio_end_io_t *end_io;
|
bio_end_io_t *end_io;
|
||||||
|
@ -66,6 +71,11 @@ struct end_io_wq {
|
||||||
struct btrfs_work work;
|
struct btrfs_work work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* async submit bios are used to offload expensive checksumming
|
||||||
|
* onto the worker threads. They checksum file and metadata bios
|
||||||
|
* just before they are sent down the IO stack.
|
||||||
|
*/
|
||||||
struct async_submit_bio {
|
struct async_submit_bio {
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
@ -76,6 +86,10 @@ struct async_submit_bio {
|
||||||
struct btrfs_work work;
|
struct btrfs_work work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extents on the btree inode are pretty simple, there's one extent
|
||||||
|
* that covers the entire device
|
||||||
|
*/
|
||||||
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
|
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
|
||||||
size_t page_offset, u64 start, u64 len,
|
size_t page_offset, u64 start, u64 len,
|
||||||
int create)
|
int create)
|
||||||
|
@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result)
|
||||||
*(__le32 *)result = ~cpu_to_le32(crc);
|
*(__le32 *)result = ~cpu_to_le32(crc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* compute the csum for a btree block, and either verify it or write it
|
||||||
|
* into the csum field of the block.
|
||||||
|
*/
|
||||||
static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
|
static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
|
||||||
int verify)
|
int verify)
|
||||||
{
|
{
|
||||||
|
@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we can't consider a given block up to date unless the transid of the
|
||||||
|
* block matches the transid in the parent node's pointer. This is how we
|
||||||
|
* detect blocks that either didn't get written at all or got written
|
||||||
|
* in the wrong place.
|
||||||
|
*/
|
||||||
static int verify_parent_transid(struct extent_io_tree *io_tree,
|
static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||||
struct extent_buffer *eb, u64 parent_transid)
|
struct extent_buffer *eb, u64 parent_transid)
|
||||||
{
|
{
|
||||||
|
@ -228,9 +252,12 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||||
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
|
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
|
||||||
GFP_NOFS);
|
GFP_NOFS);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helper to read a given tree block, doing retries as required when
|
||||||
|
* the checksums don't match and we have alternate mirrors to try.
|
||||||
|
*/
|
||||||
static int btree_read_extent_buffer_pages(struct btrfs_root *root,
|
static int btree_read_extent_buffer_pages(struct btrfs_root *root,
|
||||||
struct extent_buffer *eb,
|
struct extent_buffer *eb,
|
||||||
u64 start, u64 parent_transid)
|
u64 start, u64 parent_transid)
|
||||||
|
@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* checksum a dirty tree block before IO. This has extra checks to make
|
||||||
|
* sure we only fill in the checksum field in the first page of a multi-page block
|
||||||
|
*/
|
||||||
int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
|
int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
|
||||||
{
|
{
|
||||||
struct extent_io_tree *tree;
|
struct extent_io_tree *tree;
|
||||||
|
|
|
@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(wait_on_extent_writeback);
|
EXPORT_SYMBOL(wait_on_extent_writeback);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* either insert or lock state struct between start and end use mask to tell
|
||||||
|
* us if waiting is desired.
|
||||||
|
*/
|
||||||
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
|
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(set_range_writeback);
|
EXPORT_SYMBOL(set_range_writeback);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* find the first offset in the io tree with 'bits' set. zero is
|
||||||
|
* returned if we find something, and *start_ret and *end_ret are
|
||||||
|
* set to reflect the state struct that was found.
|
||||||
|
*
|
||||||
|
* If nothing was found, 1 is returned, < 0 on error
|
||||||
|
*/
|
||||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
u64 *start_ret, u64 *end_ret, int bits)
|
u64 *start_ret, u64 *end_ret, int bits)
|
||||||
{
|
{
|
||||||
|
@ -1017,6 +1028,10 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_first_extent_bit);
|
EXPORT_SYMBOL(find_first_extent_bit);
|
||||||
|
|
||||||
|
/* find the first state struct with 'bits' set after 'start', and
|
||||||
|
* return it. tree->lock must be held. NULL will returned if
|
||||||
|
* nothing was found after 'start'
|
||||||
|
*/
|
||||||
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
|
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
|
||||||
u64 start, int bits)
|
u64 start, int bits)
|
||||||
{
|
{
|
||||||
|
@ -1046,8 +1061,14 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_first_extent_bit_state);
|
EXPORT_SYMBOL(find_first_extent_bit_state);
|
||||||
|
|
||||||
u64 find_lock_delalloc_range(struct extent_io_tree *tree,
|
/*
|
||||||
u64 *start, u64 *end, u64 max_bytes)
|
* find a contiguous range of bytes in the file marked as delalloc, not
|
||||||
|
* more than 'max_bytes'. start and end are used to return the range,
|
||||||
|
*
|
||||||
|
* 1 is returned if we find something, 0 if nothing was in the tree
|
||||||
|
*/
|
||||||
|
static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree,
|
||||||
|
u64 *start, u64 *end, u64 max_bytes)
|
||||||
{
|
{
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
struct extent_state *state;
|
struct extent_state *state;
|
||||||
|
@ -1130,6 +1151,11 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* count the number of bytes in the tree that have a given bit(s)
|
||||||
|
* set. This can be fairly slow, except for EXTENT_DIRTY which is
|
||||||
|
* cached. The total number found is returned.
|
||||||
|
*/
|
||||||
u64 count_range_bits(struct extent_io_tree *tree,
|
u64 count_range_bits(struct extent_io_tree *tree,
|
||||||
u64 *start, u64 search_end, u64 max_bytes,
|
u64 *start, u64 search_end, u64 max_bytes,
|
||||||
unsigned long bits)
|
unsigned long bits)
|
||||||
|
@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(unlock_range);
|
EXPORT_SYMBOL(unlock_range);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set the private field for a given byte offset in the tree. If there isn't
|
||||||
|
* an extent_state there already, this does nothing.
|
||||||
|
*/
|
||||||
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
|
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
|
||||||
{
|
{
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
|
|
|
@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* search through the tree for an extent_map with a given offset. If
|
||||||
|
* it can't be found, try to find some neighboring extents
|
||||||
|
*/
|
||||||
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||||
struct rb_node **prev_ret,
|
struct rb_node **prev_ret,
|
||||||
struct rb_node **next_ret)
|
struct rb_node **next_ret)
|
||||||
|
@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* look for an offset in the tree, and if it can't be found, return
|
||||||
|
* the first offset we can find smaller than 'offset'.
|
||||||
|
*/
|
||||||
static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
|
static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
|
||||||
{
|
{
|
||||||
struct rb_node *prev;
|
struct rb_node *prev;
|
||||||
|
@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check to see if two extent_map structs are adjacent and safe to merge */
|
||||||
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
||||||
{
|
{
|
||||||
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
|
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
|
||||||
|
@ -250,6 +259,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(add_extent_mapping);
|
EXPORT_SYMBOL(add_extent_mapping);
|
||||||
|
|
||||||
|
/* simple helper to do math around the end of an extent, handling wrap */
|
||||||
static u64 range_end(u64 start, u64 len)
|
static u64 range_end(u64 start, u64 len)
|
||||||
{
|
{
|
||||||
if (start + len < start)
|
if (start + len < start)
|
||||||
|
|
|
@ -41,6 +41,9 @@
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* simple helper to fault in pages and copy. This should go away
|
||||||
|
* and be replaced with calls into generic code.
|
||||||
|
*/
|
||||||
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
|
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
|
||||||
int write_bytes,
|
int write_bytes,
|
||||||
struct page **prepared_pages,
|
struct page **prepared_pages,
|
||||||
|
@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
|
||||||
return page_fault ? -EFAULT : 0;
|
return page_fault ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* unlocks pages after btrfs_file_write is done with them
|
||||||
|
*/
|
||||||
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
|
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
for (i = 0; i < num_pages; i++) {
|
for (i = 0; i < num_pages; i++) {
|
||||||
if (!pages[i])
|
if (!pages[i])
|
||||||
break;
|
break;
|
||||||
|
/* page checked is some magic around finding pages that
|
||||||
|
* have been modified without going through btrfs_set_page_dirty
|
||||||
|
* clear it here
|
||||||
|
*/
|
||||||
ClearPageChecked(pages[i]);
|
ClearPageChecked(pages[i]);
|
||||||
unlock_page(pages[i]);
|
unlock_page(pages[i]);
|
||||||
mark_page_accessed(pages[i]);
|
mark_page_accessed(pages[i]);
|
||||||
|
@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* this does all the hard work for inserting an inline extent into
|
||||||
|
* the btree. Any existing inline extent is extended as required to make room,
|
||||||
|
* otherwise things are inserted as required into the btree
|
||||||
|
*/
|
||||||
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
|
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode,
|
struct btrfs_root *root, struct inode *inode,
|
||||||
u64 offset, size_t size,
|
u64 offset, size_t size,
|
||||||
|
@ -228,6 +242,14 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* after copy_from_user, pages need to be dirtied and we need to make
|
||||||
|
* sure holes are created between the current EOF and the start of
|
||||||
|
* any next extents (if required).
|
||||||
|
*
|
||||||
|
* this also makes the decision about creating an inline extent vs
|
||||||
|
* doing real data extents, marking pages dirty and delalloc as required.
|
||||||
|
*/
|
||||||
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
|
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct file *file,
|
struct file *file,
|
||||||
|
@ -362,6 +384,10 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this drops all the extents in the cache that intersect the range
|
||||||
|
* [start, end]. Existing extents are split as required.
|
||||||
|
*/
|
||||||
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
|
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
|
||||||
int skip_pinned)
|
int skip_pinned)
|
||||||
{
|
{
|
||||||
|
@ -536,6 +562,9 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
|
||||||
* If an extent intersects the range but is not entirely inside the range
|
* If an extent intersects the range but is not entirely inside the range
|
||||||
* it is either truncated or split. Anything entirely inside the range
|
* it is either truncated or split. Anything entirely inside the range
|
||||||
* is deleted from the tree.
|
* is deleted from the tree.
|
||||||
|
*
|
||||||
|
* inline_limit is used to tell this code which offsets in the file to keep
|
||||||
|
* if they contain inline extents.
|
||||||
*/
|
*/
|
||||||
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode,
|
struct btrfs_root *root, struct inode *inode,
|
||||||
|
@ -796,7 +825,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this gets pages into the page cache and locks them down
|
* this gets pages into the page cache and locks them down, it also properly
|
||||||
|
* waits for data=ordered extents to finish before allowing the pages to be
|
||||||
|
* modified.
|
||||||
*/
|
*/
|
||||||
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
|
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
|
||||||
struct page **pages, size_t num_pages,
|
struct page **pages, size_t num_pages,
|
||||||
|
@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fsync call for both files and directories. This logs the inode into
|
||||||
|
* the tree log instead of forcing full commits whenever possible.
|
||||||
|
*
|
||||||
|
* It needs to call filemap_fdatawait so that all ordered extent updates are
|
||||||
|
* in the metadata btree are up to date for copying to the log.
|
||||||
|
*
|
||||||
|
* It drops the inode mutex before doing the tree log commit. This is an
|
||||||
|
* important optimization for directories because holding the mutex prevents
|
||||||
|
* new operations on the dir while we write to disk.
|
||||||
|
*/
|
||||||
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
|
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
|
||||||
{
|
{
|
||||||
struct inode *inode = dentry->d_inode;
|
struct inode *inode = dentry->d_inode;
|
||||||
|
|
189
fs/btrfs/inode.c
189
fs/btrfs/inode.c
|
@ -83,6 +83,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||||
|
|
||||||
static void btrfs_truncate(struct inode *inode);
|
static void btrfs_truncate(struct inode *inode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* a very lame attempt at stopping writes when the FS is 85% full. There
|
||||||
|
* are countless ways this is incorrect, but it is better than nothing.
|
||||||
|
*/
|
||||||
int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
|
int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
|
||||||
int for_del)
|
int for_del)
|
||||||
{
|
{
|
||||||
|
@ -108,6 +112,12 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when extent_io.c finds a delayed allocation range in the file,
|
||||||
|
* the call backs end up in this code. The basic idea is to
|
||||||
|
* allocate extents on disk for the range, and create ordered data structs
|
||||||
|
* in ram to track those extents.
|
||||||
|
*/
|
||||||
static int cow_file_range(struct inode *inode, u64 start, u64 end)
|
static int cow_file_range(struct inode *inode, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
|
@ -185,6 +195,13 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when nowcow writeback call back. This checks for snapshots or COW copies
|
||||||
|
* of the extents that exist in the file, and COWs the file as required.
|
||||||
|
*
|
||||||
|
* If no cow copies or snapshots exist, we write directly to the existing
|
||||||
|
* blocks on disk
|
||||||
|
*/
|
||||||
static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
|
static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
u64 extent_start;
|
u64 extent_start;
|
||||||
|
@ -291,6 +308,9 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extent_io.c call back to do delayed allocation processing
|
||||||
|
*/
|
||||||
static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
|
static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
|
@ -305,6 +325,11 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extent_io.c set_bit_hook, used to track delayed allocation
|
||||||
|
* bytes in this file, and to maintain the list of inodes that
|
||||||
|
* have pending delalloc work to be done.
|
||||||
|
*/
|
||||||
int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
|
int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
|
||||||
unsigned long old, unsigned long bits)
|
unsigned long old, unsigned long bits)
|
||||||
{
|
{
|
||||||
|
@ -323,6 +348,9 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extent_io.c clear_bit_hook, see set_bit_hook for why
|
||||||
|
*/
|
||||||
int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
|
int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
|
||||||
unsigned long old, unsigned long bits)
|
unsigned long old, unsigned long bits)
|
||||||
{
|
{
|
||||||
|
@ -349,6 +377,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
|
||||||
|
* we don't create bios that span stripes or chunks
|
||||||
|
*/
|
||||||
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
|
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
|
||||||
size_t size, struct bio *bio)
|
size_t size, struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -371,6 +403,14 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* in order to insert checksums into the metadata in large chunks,
|
||||||
|
* we wait until bio submission time. All the pages in the bio are
|
||||||
|
* checksummed and sums are attached onto the ordered extent record.
|
||||||
|
*
|
||||||
|
* At IO completion time the cums attached on the ordered extent record
|
||||||
|
* are inserted into the btree
|
||||||
|
*/
|
||||||
int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||||
int mirror_num)
|
int mirror_num)
|
||||||
{
|
{
|
||||||
|
@ -383,6 +423,10 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||||
return btrfs_map_bio(root, rw, bio, mirror_num, 1);
|
return btrfs_map_bio(root, rw, bio, mirror_num, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extent_io.c submission hook. This does the right thing for csum calculation on write,
|
||||||
|
* or reading the csums from the tree before a read
|
||||||
|
*/
|
||||||
int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||||
int mirror_num)
|
int mirror_num)
|
||||||
{
|
{
|
||||||
|
@ -408,6 +452,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||||
return btrfs_map_bio(root, rw, bio, mirror_num, 0);
|
return btrfs_map_bio(root, rw, bio, mirror_num, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* given a list of ordered sums record them in the inode. This happens
|
||||||
|
* at IO completion time based on sums calculated at bio submission time.
|
||||||
|
*/
|
||||||
static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
|
static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
|
||||||
struct inode *inode, u64 file_offset,
|
struct inode *inode, u64 file_offset,
|
||||||
struct list_head *list)
|
struct list_head *list)
|
||||||
|
@ -430,12 +478,12 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
|
||||||
GFP_NOFS);
|
GFP_NOFS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* see btrfs_writepage_start_hook for details on why this is required */
|
||||||
struct btrfs_writepage_fixup {
|
struct btrfs_writepage_fixup {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct btrfs_work work;
|
struct btrfs_work work;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* see btrfs_writepage_start_hook for details on why this is required */
|
|
||||||
void btrfs_writepage_fixup_worker(struct btrfs_work *work)
|
void btrfs_writepage_fixup_worker(struct btrfs_work *work)
|
||||||
{
|
{
|
||||||
struct btrfs_writepage_fixup *fixup;
|
struct btrfs_writepage_fixup *fixup;
|
||||||
|
@ -522,6 +570,10 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* as ordered data IO finishes, this gets called so we can finish
|
||||||
|
* an ordered extent if the range of bytes in the file it covers are
|
||||||
|
* fully written.
|
||||||
|
*/
|
||||||
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
|
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
|
@ -631,6 +683,14 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
|
||||||
return btrfs_finish_ordered_io(page->mapping->host, start, end);
|
return btrfs_finish_ordered_io(page->mapping->host, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When IO fails, either with EIO or csum verification fails, we
|
||||||
|
* try other mirrors that might have a good copy of the data. This
|
||||||
|
* io_failure_record is used to record state as we go through all the
|
||||||
|
* mirrors. If another mirror has good data, the page is set up to date
|
||||||
|
* and things continue. If a good mirror can't be found, the original
|
||||||
|
* bio end_io callback is called to indicate things have failed.
|
||||||
|
*/
|
||||||
struct io_failure_record {
|
struct io_failure_record {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
u64 start;
|
u64 start;
|
||||||
|
@ -725,6 +785,10 @@ int btrfs_io_failed_hook(struct bio *failed_bio,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* each time an IO finishes, we do a fast check in the IO failure tree
|
||||||
|
* to see if we need to process or clean up an io_failure_record
|
||||||
|
*/
|
||||||
int btrfs_clean_io_failures(struct inode *inode, u64 start)
|
int btrfs_clean_io_failures(struct inode *inode, u64 start)
|
||||||
{
|
{
|
||||||
u64 private;
|
u64 private;
|
||||||
|
@ -753,6 +817,11 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when reads are done, we need to check csums to verify the data is correct
|
||||||
|
* if there's a match, we allow the bio to finish. If not, we go through
|
||||||
|
* the io_failure_record routines to find good copies
|
||||||
|
*/
|
||||||
int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
|
int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
|
||||||
struct extent_state *state)
|
struct extent_state *state)
|
||||||
{
|
{
|
||||||
|
@ -990,6 +1059,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* read an inode from the btree into the in-memory inode
|
||||||
|
*/
|
||||||
void btrfs_read_locked_inode(struct inode *inode)
|
void btrfs_read_locked_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
|
@ -1083,6 +1155,9 @@ void btrfs_read_locked_inode(struct inode *inode)
|
||||||
make_bad_inode(inode);
|
make_bad_inode(inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* given a leaf and an inode, copy the inode fields into the leaf
|
||||||
|
*/
|
||||||
static void fill_inode_item(struct btrfs_trans_handle *trans,
|
static void fill_inode_item(struct btrfs_trans_handle *trans,
|
||||||
struct extent_buffer *leaf,
|
struct extent_buffer *leaf,
|
||||||
struct btrfs_inode_item *item,
|
struct btrfs_inode_item *item,
|
||||||
|
@ -1118,6 +1193,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
|
||||||
BTRFS_I(inode)->block_group->key.objectid);
|
BTRFS_I(inode)->block_group->key.objectid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* copy everything in the in-memory inode into the btree.
|
||||||
|
*/
|
||||||
int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
|
int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct inode *inode)
|
struct inode *inode)
|
||||||
|
@ -1151,6 +1229,11 @@ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* unlink helper that gets used here in inode.c and in the tree logging
|
||||||
|
* recovery code. It remove a link in a directory with a given name, and
|
||||||
|
* also drops the back refs in the inode to the directory
|
||||||
|
*/
|
||||||
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct inode *dir, struct inode *inode,
|
struct inode *dir, struct inode *inode,
|
||||||
|
@ -1309,7 +1392,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||||
/*
|
/*
|
||||||
* this can truncate away extent items, csum items and directory items.
|
* this can truncate away extent items, csum items and directory items.
|
||||||
* It starts at a high offset and removes keys until it can't find
|
* It starts at a high offset and removes keys until it can't find
|
||||||
* any higher than i_size.
|
* any higher than new_size
|
||||||
*
|
*
|
||||||
* csum items that cross the new i_size are truncated to the new size
|
* csum items that cross the new i_size are truncated to the new size
|
||||||
* as well.
|
* as well.
|
||||||
|
@ -2123,6 +2206,11 @@ void btrfs_dirty_inode(struct inode *inode)
|
||||||
btrfs_end_transaction(trans, root);
|
btrfs_end_transaction(trans, root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* find the highest existing sequence number in a directory
|
||||||
|
* and then set the in-memory index_cnt variable to reflect
|
||||||
|
* free sequence numbers
|
||||||
|
*/
|
||||||
static int btrfs_set_inode_index_count(struct inode *inode)
|
static int btrfs_set_inode_index_count(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
|
@ -2175,6 +2263,10 @@ static int btrfs_set_inode_index_count(struct inode *inode)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helper to find a free sequence number in a given directory. This current
|
||||||
|
* code is very simple, later versions will do smarter things in the btree
|
||||||
|
*/
|
||||||
static int btrfs_set_inode_index(struct inode *dir, struct inode *inode,
|
static int btrfs_set_inode_index(struct inode *dir, struct inode *inode,
|
||||||
u64 *index)
|
u64 *index)
|
||||||
{
|
{
|
||||||
|
@ -2305,6 +2397,12 @@ static inline u8 btrfs_inode_type(struct inode *inode)
|
||||||
return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
|
return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* utility function to add 'inode' into 'parent_inode' with
|
||||||
|
* a give name and a given sequence number.
|
||||||
|
* if 'add_backref' is true, also insert a backref from the
|
||||||
|
* inode to the parent directory.
|
||||||
|
*/
|
||||||
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
||||||
struct inode *parent_inode, struct inode *inode,
|
struct inode *parent_inode, struct inode *inode,
|
||||||
const char *name, int name_len, int add_backref, u64 index)
|
const char *name, int name_len, int add_backref, u64 index)
|
||||||
|
@ -2611,6 +2709,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* helper for btfs_get_extent. Given an existing extent in the tree,
|
||||||
|
* and an extent that you want to insert, deal with overlap and insert
|
||||||
|
* the new extent into the tree.
|
||||||
|
*/
|
||||||
static int merge_extent_mapping(struct extent_map_tree *em_tree,
|
static int merge_extent_mapping(struct extent_map_tree *em_tree,
|
||||||
struct extent_map *existing,
|
struct extent_map *existing,
|
||||||
struct extent_map *em,
|
struct extent_map *em,
|
||||||
|
@ -2627,6 +2729,14 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
|
||||||
return add_extent_mapping(em_tree, em);
|
return add_extent_mapping(em_tree, em);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* a bit scary, this does extent mapping from logical file offset to the disk.
|
||||||
|
* the ugly parts come from merging extents from the disk with the
|
||||||
|
* in-ram representation. This gets more complex because of the data=ordered code,
|
||||||
|
* where the in-ram extents might be locked pending data=ordered completion.
|
||||||
|
*
|
||||||
|
* This also copies inline extents directly into the page.
|
||||||
|
*/
|
||||||
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
|
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
|
||||||
size_t pg_offset, u64 start, u64 len,
|
size_t pg_offset, u64 start, u64 len,
|
||||||
int create)
|
int create)
|
||||||
|
@ -2869,76 +2979,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
|
||||||
return em;
|
return em;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0 /* waiting for O_DIRECT reads */
|
|
||||||
static int btrfs_get_block(struct inode *inode, sector_t iblock,
|
|
||||||
struct buffer_head *bh_result, int create)
|
|
||||||
{
|
|
||||||
struct extent_map *em;
|
|
||||||
u64 start = (u64)iblock << inode->i_blkbits;
|
|
||||||
struct btrfs_multi_bio *multi = NULL;
|
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
||||||
u64 len;
|
|
||||||
u64 logical;
|
|
||||||
u64 map_length;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
|
|
||||||
|
|
||||||
if (!em || IS_ERR(em))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (em->start > start || em->start + em->len <= start) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (em->block_start == EXTENT_MAP_INLINE) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
len = em->start + em->len - start;
|
|
||||||
len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
|
|
||||||
|
|
||||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
|
||||||
em->block_start == EXTENT_MAP_DELALLOC) {
|
|
||||||
bh_result->b_size = len;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
logical = start - em->start;
|
|
||||||
logical = em->block_start + logical;
|
|
||||||
|
|
||||||
map_length = len;
|
|
||||||
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
|
|
||||||
logical, &map_length, &multi, 0);
|
|
||||||
BUG_ON(ret);
|
|
||||||
bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
|
|
||||||
bh_result->b_size = min(map_length, len);
|
|
||||||
|
|
||||||
bh_result->b_bdev = multi->stripes[0].dev->bdev;
|
|
||||||
set_buffer_mapped(bh_result);
|
|
||||||
kfree(multi);
|
|
||||||
out:
|
|
||||||
free_extent_map(em);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
||||||
const struct iovec *iov, loff_t offset,
|
const struct iovec *iov, loff_t offset,
|
||||||
unsigned long nr_segs)
|
unsigned long nr_segs)
|
||||||
{
|
{
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
#if 0
|
|
||||||
struct file *file = iocb->ki_filp;
|
|
||||||
struct inode *inode = file->f_mapping->host;
|
|
||||||
|
|
||||||
if (rw == WRITE)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
|
|
||||||
offset, nr_segs, btrfs_get_block, NULL);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
|
static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
|
||||||
|
@ -3202,6 +3247,9 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* create a new subvolume directory/inode (helper for the ioctl).
|
||||||
|
*/
|
||||||
int btrfs_create_subvol_root(struct btrfs_root *new_root,
|
int btrfs_create_subvol_root(struct btrfs_root *new_root,
|
||||||
struct btrfs_trans_handle *trans, u64 new_dirid,
|
struct btrfs_trans_handle *trans, u64 new_dirid,
|
||||||
struct btrfs_block_group_cache *block_group)
|
struct btrfs_block_group_cache *block_group)
|
||||||
|
@ -3223,6 +3271,9 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root,
|
||||||
return btrfs_update_inode(trans, new_root, inode);
|
return btrfs_update_inode(trans, new_root, inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* helper function for file defrag and space balancing. This
|
||||||
|
* forces readahead on a given range of bytes in an inode
|
||||||
|
*/
|
||||||
unsigned long btrfs_force_ra(struct address_space *mapping,
|
unsigned long btrfs_force_ra(struct address_space *mapping,
|
||||||
struct file_ra_state *ra, struct file *file,
|
struct file_ra_state *ra, struct file *file,
|
||||||
pgoff_t offset, pgoff_t last_index)
|
pgoff_t offset, pgoff_t last_index)
|
||||||
|
@ -3424,6 +3475,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* some fairly slow code that needs optimization. This walks the list
|
||||||
|
* of all the inodes with pending delalloc and forces them to disk.
|
||||||
|
*/
|
||||||
int btrfs_start_delalloc_inodes(struct btrfs_root *root)
|
int btrfs_start_delalloc_inodes(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct list_head *head = &root->fs_info->delalloc_inodes;
|
struct list_head *head = &root->fs_info->delalloc_inodes;
|
||||||
|
|
|
@ -25,6 +25,15 @@
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
#include "locking.h"
|
#include "locking.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* locks the per buffer mutex in an extent buffer. This uses adaptive locks
|
||||||
|
* and the spin is not tuned very extensively. The spinning does make a big
|
||||||
|
* difference in almost every workload, but spinning for the right amount of
|
||||||
|
* time needs some help.
|
||||||
|
*
|
||||||
|
* In general, we want to spin as long as the lock holder is doing btree searches,
|
||||||
|
* and we should give up if they are in more expensive code.
|
||||||
|
*/
|
||||||
int btrfs_tree_lock(struct extent_buffer *eb)
|
int btrfs_tree_lock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb)
|
||||||
return mutex_is_locked(&eb->mutex);
|
return mutex_is_locked(&eb->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* btrfs_search_slot uses this to decide if it should drop its locks
|
||||||
|
* before doing something expensive like allocating free blocks for cow.
|
||||||
|
*/
|
||||||
int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
|
int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
|
@ -26,7 +26,6 @@
|
||||||
#include "btrfs_inode.h"
|
#include "btrfs_inode.h"
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
|
|
||||||
|
|
||||||
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
||||||
{
|
{
|
||||||
if (entry->file_offset + entry->len < entry->file_offset)
|
if (entry->file_offset + entry->len < entry->file_offset)
|
||||||
|
@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry)
|
||||||
return entry->file_offset + entry->len;
|
return entry->file_offset + entry->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* returns NULL if the insertion worked, or it returns the node it did find
|
||||||
|
* in the tree
|
||||||
|
*/
|
||||||
static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
||||||
struct rb_node *node)
|
struct rb_node *node)
|
||||||
{
|
{
|
||||||
|
@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* look for a given offset in the tree, and if it can't be found return the
|
||||||
|
* first lesser offset
|
||||||
|
*/
|
||||||
static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
||||||
struct rb_node **prev_ret)
|
struct rb_node **prev_ret)
|
||||||
{
|
{
|
||||||
|
@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* helper to check if a given offset is inside a given entry
|
||||||
|
*/
|
||||||
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
||||||
{
|
{
|
||||||
if (file_offset < entry->file_offset ||
|
if (file_offset < entry->file_offset ||
|
||||||
|
@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* look find the first ordered struct that has this offset, otherwise
|
||||||
|
* the first one less than this offset
|
||||||
|
*/
|
||||||
static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
||||||
u64 file_offset)
|
u64 file_offset)
|
||||||
{
|
{
|
||||||
|
@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* wait for all the ordered extents in a root. This is done when balancing
|
||||||
|
* space between drives.
|
||||||
|
*/
|
||||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
|
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
|
||||||
{
|
{
|
||||||
struct list_head splice;
|
struct list_head splice;
|
||||||
|
|
|
@ -21,6 +21,16 @@
|
||||||
#include "ref-cache.h"
|
#include "ref-cache.h"
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* leaf refs are used to cache the information about which extents
|
||||||
|
* a given leaf has references on. This allows us to process that leaf
|
||||||
|
* in btrfs_drop_snapshot without needing to read it back from disk.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kmalloc a leaf reference struct and update the counters for the
|
||||||
|
* total ref cache size
|
||||||
|
*/
|
||||||
struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
|
struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
|
||||||
int nr_extents)
|
int nr_extents)
|
||||||
{
|
{
|
||||||
|
@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
|
||||||
return ref;
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* free a leaf reference struct and update the counters for the
|
||||||
|
* total ref cache size
|
||||||
|
*/
|
||||||
void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
||||||
{
|
{
|
||||||
if (!ref)
|
if (!ref)
|
||||||
|
@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* find the leaf ref for a given extent. This returns the ref struct with
|
||||||
|
* a usage reference incremented
|
||||||
|
*/
|
||||||
struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
|
struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
|
||||||
u64 bytenr)
|
u64 bytenr)
|
||||||
{
|
{
|
||||||
|
@ -160,6 +178,10 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* add a fully filled in leaf ref struct
|
||||||
|
* remove all the refs older than a given root generation
|
||||||
|
*/
|
||||||
int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
|
int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
|
||||||
int shared)
|
int shared)
|
||||||
{
|
{
|
||||||
|
@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* remove a single leaf ref from the tree. This drops the ref held by the tree
|
||||||
|
* only
|
||||||
|
*/
|
||||||
int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
||||||
{
|
{
|
||||||
struct btrfs_leaf_ref_tree *tree;
|
struct btrfs_leaf_ref_tree *tree;
|
||||||
|
|
|
@ -19,8 +19,11 @@
|
||||||
#define __REFCACHE__
|
#define __REFCACHE__
|
||||||
|
|
||||||
struct btrfs_extent_info {
|
struct btrfs_extent_info {
|
||||||
|
/* bytenr and num_bytes find the extent in the extent allocation tree */
|
||||||
u64 bytenr;
|
u64 bytenr;
|
||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
|
|
||||||
|
/* objectid and offset find the back reference for the file */
|
||||||
u64 objectid;
|
u64 objectid;
|
||||||
u64 offset;
|
u64 offset;
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,10 @@
|
||||||
#include "print-tree.h"
|
#include "print-tree.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns 0 on finding something, 1 if no more roots are there
|
* search forward for a root, starting with objectid 'search_start'
|
||||||
* and < 0 on error
|
* if a root key is found, the objectid we find is filled into 'found_objectid'
|
||||||
|
* and 0 is returned. < 0 is returned on error, 1 if there is nothing
|
||||||
|
* left in the tree.
|
||||||
*/
|
*/
|
||||||
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
|
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
|
||||||
u64 *found_objectid)
|
u64 *found_objectid)
|
||||||
|
@ -66,6 +68,11 @@ int btrfs_search_root(struct btrfs_root *root, u64 search_start,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lookup the root with the highest offset for a given objectid. The key we do
|
||||||
|
* find is copied into 'key'. If we find something return 0, otherwise 1, < 0
|
||||||
|
* on error.
|
||||||
|
*/
|
||||||
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
|
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
|
||||||
struct btrfs_root_item *item, struct btrfs_key *key)
|
struct btrfs_root_item *item, struct btrfs_key *key)
|
||||||
{
|
{
|
||||||
|
@ -104,6 +111,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* copy the data in 'item' into the btree
|
||||||
|
*/
|
||||||
int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
*root, struct btrfs_key *key, struct btrfs_root_item
|
*root, struct btrfs_key *key, struct btrfs_root_item
|
||||||
*item)
|
*item)
|
||||||
|
@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* at mount time we want to find all the old transaction snapshots that were in
|
||||||
|
* the process of being deleted if we crashed. This is any root item with an offset
|
||||||
|
* lower than the latest root. They need to be queued for deletion to finish
|
||||||
|
* what was happening when we crashed.
|
||||||
|
*/
|
||||||
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
|
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
|
||||||
struct btrfs_root *latest)
|
struct btrfs_root *latest)
|
||||||
{
|
{
|
||||||
|
@ -227,6 +243,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* drop the root item for 'key' from 'root' */
|
||||||
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
struct btrfs_key *key)
|
struct btrfs_key *key)
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,6 +17,27 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
|
|
||||||
|
/* this is some deeply nasty code. ctree.h has a different
|
||||||
|
* definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
|
||||||
|
*
|
||||||
|
* The end result is that anyone who #includes ctree.h gets a
|
||||||
|
* declaration for the btrfs_set_foo functions and btrfs_foo functions
|
||||||
|
*
|
||||||
|
* This file declares the macros and then #includes ctree.h, which results
|
||||||
|
* in cpp creating the function here based on the template below.
|
||||||
|
*
|
||||||
|
* These setget functions do all the extent_buffer related mapping
|
||||||
|
* required to efficiently read and write specific fields in the extent
|
||||||
|
* buffers. Every pointer to metadata items in btrfs is really just
|
||||||
|
* an unsigned long offset into the extent buffer which has been
|
||||||
|
* cast to a specific type. This gives us all the gcc type checking.
|
||||||
|
*
|
||||||
|
* The extent buffer api is used to do all the kmapping and page
|
||||||
|
* spanning work required to get extent buffers in highmem and have
|
||||||
|
* a metadata blocksize different from the page size.
|
||||||
|
*/
|
||||||
|
|
||||||
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
|
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
|
||||||
u##bits btrfs_##name(struct extent_buffer *eb, \
|
u##bits btrfs_##name(struct extent_buffer *eb, \
|
||||||
type *s) \
|
type *s) \
|
||||||
|
|
|
@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = {
|
||||||
.fs_flags = FS_REQUIRES_DEV,
|
.fs_flags = FS_REQUIRES_DEV,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* used by btrfsctl to scan devices when no FS is mounted
|
||||||
|
*/
|
||||||
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
|
|
|
@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* either allocate a new transaction or hop into the existing one
|
||||||
|
*/
|
||||||
static noinline int join_transaction(struct btrfs_root *root)
|
static noinline int join_transaction(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct btrfs_transaction *cur_trans;
|
struct btrfs_transaction *cur_trans;
|
||||||
|
@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this does all the record keeping required to make sure that a
|
||||||
|
* reference counted root is properly recorded in a given transaction.
|
||||||
|
* This is required to make sure the old root from before we joined the transaction
|
||||||
|
* is deleted when the transaction commits
|
||||||
|
*/
|
||||||
noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
|
noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct btrfs_dirty_root *dirty;
|
struct btrfs_dirty_root *dirty;
|
||||||
|
@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* wait for commit against the current transaction to become unblocked
|
||||||
|
* when this is done, it is safe to start a new transaction, but the current
|
||||||
|
* transaction might not be fully on disk.
|
||||||
|
*/
|
||||||
static void wait_current_trans(struct btrfs_root *root)
|
static void wait_current_trans(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct btrfs_transaction *cur_trans;
|
struct btrfs_transaction *cur_trans;
|
||||||
|
@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
|
||||||
return start_transaction(r, num_blocks, 2);
|
return start_transaction(r, num_blocks, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* wait for a transaction commit to be fully complete */
|
||||||
static noinline int wait_for_commit(struct btrfs_root *root,
|
static noinline int wait_for_commit(struct btrfs_root *root,
|
||||||
struct btrfs_transaction *commit)
|
struct btrfs_transaction *commit)
|
||||||
{
|
{
|
||||||
|
@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rate limit against the drop_snapshot code. This helps to slow down new operations
|
||||||
|
* if the drop_snapshot code isn't able to keep up.
|
||||||
|
*/
|
||||||
static void throttle_on_drops(struct btrfs_root *root)
|
static void throttle_on_drops(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *info = root->fs_info;
|
struct btrfs_fs_info *info = root->fs_info;
|
||||||
|
@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
|
||||||
return __btrfs_end_transaction(trans, root, 1);
|
return __btrfs_end_transaction(trans, root, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when btree blocks are allocated, they have some corresponding bits set for
|
||||||
|
* them in one of two extent_io trees. This is used to make sure all of
|
||||||
|
* those extents are on disk for transaction or log commit
|
||||||
|
*/
|
||||||
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
|
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
|
||||||
struct extent_io_tree *dirty_pages)
|
struct extent_io_tree *dirty_pages)
|
||||||
{
|
{
|
||||||
|
@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
||||||
&trans->transaction->dirty_pages);
|
&trans->transaction->dirty_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this is used to update the root pointer in the tree of tree roots.
|
||||||
|
*
|
||||||
|
* But, in the case of the extent allocation tree, updating the root
|
||||||
|
* pointer may allocate blocks which may change the root of the extent
|
||||||
|
* allocation tree.
|
||||||
|
*
|
||||||
|
* So, this loops and repeats and makes sure the cowonly root didn't
|
||||||
|
* change while the root pointer was being updated in the metadata.
|
||||||
|
*/
|
||||||
static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root)
|
struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
|
@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* update all the cowonly tree roots on disk
|
||||||
|
*/
|
||||||
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
|
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root)
|
struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
|
@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dead roots are old snapshots that need to be deleted. This allocates
|
||||||
|
* a dirty root struct and adds it into the list of dead roots that need to
|
||||||
|
* be deleted
|
||||||
|
*/
|
||||||
int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
|
int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
|
||||||
{
|
{
|
||||||
struct btrfs_dirty_root *dirty;
|
struct btrfs_dirty_root *dirty;
|
||||||
|
@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* at transaction commit time we need to schedule the old roots for
|
||||||
|
* deletion via btrfs_drop_snapshot. This runs through all the
|
||||||
|
* reference counted roots that were modified in the current
|
||||||
|
* transaction and puts them into the drop list
|
||||||
|
*/
|
||||||
static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
|
static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
|
||||||
struct radix_tree_root *radix,
|
struct radix_tree_root *radix,
|
||||||
struct list_head *list)
|
struct list_head *list)
|
||||||
|
@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* defrag a given btree. If cacheonly == 1, this won't read from the disk,
|
||||||
|
* otherwise every leaf in the btree is read and defragged.
|
||||||
|
*/
|
||||||
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
|
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *info = root->fs_info;
|
struct btrfs_fs_info *info = root->fs_info;
|
||||||
|
@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
|
||||||
|
* all of them
|
||||||
|
*/
|
||||||
static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
|
static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
|
||||||
struct list_head *list)
|
struct list_head *list)
|
||||||
{
|
{
|
||||||
|
@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* new snapshots need to be created at a very specific time in the
|
||||||
|
* transaction commit. This does the actual creation
|
||||||
|
*/
|
||||||
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_fs_info *fs_info,
|
struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_pending_snapshot *pending)
|
struct btrfs_pending_snapshot *pending)
|
||||||
|
@ -734,6 +791,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* create all the snapshots we've scheduled for creation
|
||||||
|
*/
|
||||||
static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
|
static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_fs_info *fs_info)
|
struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
|
@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* interface function to delete all the snapshots we have scheduled for deletion
|
||||||
|
*/
|
||||||
int btrfs_clean_old_snapshots(struct btrfs_root *root)
|
int btrfs_clean_old_snapshots(struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct list_head dirty_roots;
|
struct list_head dirty_roots;
|
||||||
|
|
|
@ -23,6 +23,10 @@
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
#include "locking.h"
|
#include "locking.h"
|
||||||
|
|
||||||
|
/* defrag all the leaves in a given btree. If cache_only == 1, don't read things
|
||||||
|
* from disk, otherwise read all the leaves and try to get key order to
|
||||||
|
* better reflect disk order
|
||||||
|
*/
|
||||||
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, int cache_only)
|
struct btrfs_root *root, int cache_only)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue
Block a user