A very large number of cleanups and bug fixes --- in particular for

the ext4 encryption patches, which is a new feature added in the last
 merge window.  Also fix a number of long-standing xfstest failures.
 (Quota writes failing due to ENOSPC, a race between truncate and
 writepage in data=journalled mode that was causing generic/068 to
 fail, and other corner cases.)
 
 Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2
 performance eliminating locking when a buffer is modified more than
 once during a transaction (which is very common for allocation
 bitmaps, for example), in which case the state of the journalled
 buffer head doesn't need to change.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJVi3PeAAoJEPL5WVaVDYGj+I0H/jRPexvyvnGfxiqs1sxIlbSk
 cwewFJSsuKsy/pGYdmHvozWZyWGGORc89NrxoNwdbG+axvHbgUWt/3+vF+rzmaek
 vX4v9QvCEo4PfpRgzbnYJFhbxGMJtwci887sq1o/UoNXikFYT2kz8rpdf0++eO5W
 /GJNRA5ZUY0L0eeloUILAMrBr7KjtkI2oXwOZt5q68jh7B3n3XdNQXyEiQS/28aK
 QYcFrqA/e2Fiuk6l5OSGBCP38mySu+x0nBTLT5LFwwrUBnoZvGtdjM6Sj/yADDDn
 uP/Zpq56aLzkFRwwItrDaF26BIf2MhIH/WUYs65CraEGxjMaiPuzAudGA/iUVL8=
 =1BdR
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "A very large number of cleanups and bug fixes --- in particular for
  the ext4 encryption patches, which is a new feature added in the last
  merge window.  Also fix a number of long-standing xfstest failures.
  (Quota writes failing due to ENOSPC, a race between truncate and
  writepage in data=journalled mode that was causing generic/068 to
  fail, and other corner cases.)

  Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2
  performance eliminating locking when a buffer is modified more than
  once during a transaction (which is very common for allocation
  bitmaps, for example), in which case the state of the journalled
  buffer head doesn't need to change"

[ I renamed "ext4_follow_link()" to "ext4_encrypted_follow_link()" in
  the merge resolution, to make it clear that that function is _only_
  used for encrypted symlinks.  The function doesn't actually work for
  non-encrypted symlinks at all, and they use the generic helpers
                                         - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (52 commits)
  ext4: set lazytime on remount if MS_LAZYTIME is set by mount
  ext4: only call ext4_truncate when size <= isize
  ext4: make online defrag error reporting consistent
  ext4: minor cleanup of ext4_da_reserve_space()
  ext4: don't retry file block mapping on bigalloc fs with non-extent file
  ext4: prevent ext4_quota_write() from failing due to ENOSPC
  ext4: call sync_blockdev() before invalidate_bdev() in put_super()
  jbd2: speedup jbd2_journal_dirty_metadata()
  jbd2: get rid of open coded allocation retry loop
  ext4: improve warning directory handling messages
  jbd2: fix ocfs2 corrupt when updating journal superblock fails
  ext4: mballoc: avoid 20-argument function call
  ext4: wait for existing dio workers in ext4_alloc_file_blocks()
  ext4: recalculate journal credits as inode depth changes
  jbd2: use GFP_NOFS in jbd2_cleanup_journal_tail()
  ext4: use swap() in mext_page_double_lock()
  ext4: use swap() in memswap()
  ext4: fix race between truncate and __ext4_journalled_writepage()
  ext4 crypto: fail the mount if blocksize != pagesize
  ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate
  ...
This commit is contained in:
Linus Torvalds 2015-06-25 14:06:55 -07:00
commit d857da7b70
29 changed files with 1498 additions and 1365 deletions

View File

@ -72,6 +72,7 @@ config EXT4_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
select ENCRYPTED_KEYS

View File

@ -369,7 +369,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (buffer_verified(bh))
if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
return;
ext4_lock_group(sb, block_group);
@ -446,7 +446,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
if (err)
ext4_error(sb, "Checksum bad for grp %u", block_group);
return bh;
goto verify;
}
ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) {

View File

@ -55,6 +55,9 @@ static mempool_t *ext4_bounce_page_pool;
static LIST_HEAD(ext4_free_crypto_ctxs);
static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
static struct kmem_cache *ext4_crypto_ctx_cachep;
struct kmem_cache *ext4_crypt_info_cachep;
/**
* ext4_release_crypto_ctx() - Releases an encryption context
* @ctx: The encryption context to release.
@ -68,18 +71,12 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
{
unsigned long flags;
if (ctx->bounce_page) {
if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
__free_page(ctx->bounce_page);
else
mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
ctx->bounce_page = NULL;
}
ctx->control_page = NULL;
if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page)
mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool);
ctx->w.bounce_page = NULL;
ctx->w.control_page = NULL;
if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
if (ctx->tfm)
crypto_free_tfm(ctx->tfm);
kfree(ctx);
kmem_cache_free(ext4_crypto_ctx_cachep, ctx);
} else {
spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@ -87,23 +84,6 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
}
}
/**
* ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
* @mask: The allocation mask.
*
* Return: An allocated and initialized encryption context on success. An error
* value or NULL otherwise.
*/
static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
{
struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
mask);
if (!ctx)
return ERR_PTR(-ENOMEM);
return ctx;
}
/**
* ext4_get_crypto_ctx() - Gets an encryption context
* @inode: The inode for which we are doing the crypto
@ -118,10 +98,10 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
struct ext4_crypto_ctx *ctx = NULL;
int res = 0;
unsigned long flags;
struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
if (!ext4_read_workqueue)
ext4_init_crypto();
if (ci == NULL)
return ERR_PTR(-ENOKEY);
/*
* We first try getting the ctx from a free list because in
@ -140,50 +120,16 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
list_del(&ctx->free_list);
spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
if (!ctx) {
ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
if (IS_ERR(ctx)) {
res = PTR_ERR(ctx);
ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
if (!ctx) {
res = -ENOMEM;
goto out;
}
ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
} else {
ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
/* Allocate a new Crypto API context if we don't already have
* one or if it isn't the right mode. */
BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
if (ctx->tfm && (ctx->mode != key->mode)) {
crypto_free_tfm(ctx->tfm);
ctx->tfm = NULL;
ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
}
if (!ctx->tfm) {
switch (key->mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
ctx->tfm = crypto_ablkcipher_tfm(
crypto_alloc_ablkcipher("xts(aes)", 0, 0));
break;
case EXT4_ENCRYPTION_MODE_AES_256_GCM:
/* TODO(mhalcrow): AEAD w/ gcm(aes);
* crypto_aead_setauthsize() */
ctx->tfm = ERR_PTR(-ENOTSUPP);
break;
default:
BUG();
}
if (IS_ERR_OR_NULL(ctx->tfm)) {
res = PTR_ERR(ctx->tfm);
ctx->tfm = NULL;
goto out;
}
ctx->mode = key->mode;
}
BUG_ON(key->size != ext4_encryption_key_size(key->mode));
/* There shouldn't be a bounce page attached to the crypto
* context at this point. */
BUG_ON(ctx->bounce_page);
ctx->flags &= ~EXT4_WRITE_PATH_FL;
out:
if (res) {
@ -204,20 +150,8 @@ void ext4_exit_crypto(void)
{
struct ext4_crypto_ctx *pos, *n;
list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
if (pos->bounce_page) {
if (pos->flags &
EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
__free_page(pos->bounce_page);
} else {
mempool_free(pos->bounce_page,
ext4_bounce_page_pool);
}
}
if (pos->tfm)
crypto_free_tfm(pos->tfm);
kfree(pos);
}
list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list)
kmem_cache_free(ext4_crypto_ctx_cachep, pos);
INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
if (ext4_bounce_page_pool)
mempool_destroy(ext4_bounce_page_pool);
@ -225,6 +159,12 @@ void ext4_exit_crypto(void)
if (ext4_read_workqueue)
destroy_workqueue(ext4_read_workqueue);
ext4_read_workqueue = NULL;
if (ext4_crypto_ctx_cachep)
kmem_cache_destroy(ext4_crypto_ctx_cachep);
ext4_crypto_ctx_cachep = NULL;
if (ext4_crypt_info_cachep)
kmem_cache_destroy(ext4_crypt_info_cachep);
ext4_crypt_info_cachep = NULL;
}
/**
@ -237,23 +177,31 @@ void ext4_exit_crypto(void)
*/
int ext4_init_crypto(void)
{
int i, res;
int i, res = -ENOMEM;
mutex_lock(&crypto_init);
if (ext4_read_workqueue)
goto already_initialized;
ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
if (!ext4_read_workqueue) {
res = -ENOMEM;
if (!ext4_read_workqueue)
goto fail;
ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx,
SLAB_RECLAIM_ACCOUNT);
if (!ext4_crypto_ctx_cachep)
goto fail;
ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info,
SLAB_RECLAIM_ACCOUNT);
if (!ext4_crypt_info_cachep)
goto fail;
}
for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
struct ext4_crypto_ctx *ctx;
ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
if (IS_ERR(ctx)) {
res = PTR_ERR(ctx);
ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
if (!ctx) {
res = -ENOMEM;
goto fail;
}
list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@ -317,32 +265,11 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
struct ablkcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct ext4_inode_info *ei = EXT4_I(inode);
struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
int res = 0;
BUG_ON(!ctx->tfm);
BUG_ON(ctx->mode != ei->i_encryption_key.mode);
if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
printk_ratelimited(KERN_ERR
"%s: unsupported crypto algorithm: %d\n",
__func__, ctx->mode);
return -ENOTSUPP;
}
crypto_ablkcipher_clear_flags(atfm, ~0);
crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
ei->i_encryption_key.size);
if (res) {
printk_ratelimited(KERN_ERR
"%s: crypto_ablkcipher_setkey() failed\n",
__func__);
return res;
}
req = ablkcipher_request_alloc(atfm, GFP_NOFS);
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
@ -384,6 +311,15 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
return 0;
}
static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
{
ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= EXT4_WRITE_PATH_FL;
return ctx->w.bounce_page;
}
/**
* ext4_encrypt() - Encrypts a page
* @inode: The inode for which the encryption should take place
@ -413,27 +349,17 @@ struct page *ext4_encrypt(struct inode *inode,
return (struct page *) ctx;
/* The encryption operation will require a bounce page. */
ciphertext_page = alloc_page(GFP_NOFS);
if (!ciphertext_page) {
/* This is a potential bottleneck, but at least we'll have
* forward progress. */
ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
GFP_NOFS);
if (WARN_ON_ONCE(!ciphertext_page)) {
ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
GFP_NOFS | __GFP_WAIT);
}
ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
} else {
ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
}
ctx->bounce_page = ciphertext_page;
ctx->control_page = plaintext_page;
ciphertext_page = alloc_bounce_page(ctx);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
plaintext_page, ciphertext_page);
if (err) {
ciphertext_page = ERR_PTR(err);
errout:
ext4_release_crypto_ctx(ctx);
return ERR_PTR(err);
return ciphertext_page;
}
SetPagePrivate(ciphertext_page);
set_page_private(ciphertext_page, (unsigned long)ctx);
@ -470,8 +396,8 @@ int ext4_decrypt_one(struct inode *inode, struct page *page)
struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);
if (!ctx)
return -ENOMEM;
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ret = ext4_decrypt(ctx, page);
ext4_release_crypto_ctx(ctx);
return ret;
@ -493,21 +419,11 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = alloc_page(GFP_NOFS);
if (!ciphertext_page) {
/* This is a potential bottleneck, but at least we'll have
* forward progress. */
ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
GFP_NOFS);
if (WARN_ON_ONCE(!ciphertext_page)) {
ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
GFP_NOFS | __GFP_WAIT);
}
ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
} else {
ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
ciphertext_page = alloc_bounce_page(ctx);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
ctx->bounce_page = ciphertext_page;
while (len--) {
err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
@ -529,6 +445,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
goto errout;
}
err = submit_bio_wait(WRITE, bio);
bio_put(bio);
if (err)
goto errout;
}

View File

@ -48,6 +48,12 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
}
static unsigned max_name_len(struct inode *inode)
{
return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
EXT4_NAME_LEN;
}
/**
* ext4_fname_encrypt() -
*
@ -55,43 +61,52 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
* ciphertext. Errors are returned as negative numbers. We trust the caller to
* allocate sufficient memory to oname string.
*/
static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
static int ext4_fname_encrypt(struct inode *inode,
const struct qstr *iname,
struct ext4_str *oname)
{
u32 ciphertext_len;
struct ablkcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct crypto_ablkcipher *tfm = ctx->ctfm;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
struct scatterlist sg[1];
int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
char *workbuf;
struct scatterlist src_sg, dst_sg;
int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
char *workbuf, buf[32], *alloc_buf = NULL;
unsigned lim = max_name_len(inode);
if (iname->len <= 0 || iname->len > ctx->lim)
if (iname->len <= 0 || iname->len > lim)
return -EIO;
ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
EXT4_CRYPTO_BLOCK_SIZE : iname->len;
ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > ctx->lim)
? ctx->lim : ciphertext_len;
ciphertext_len = (ciphertext_len > lim)
? lim : ciphertext_len;
if (ciphertext_len <= sizeof(buf)) {
workbuf = buf;
} else {
alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
if (!alloc_buf)
return -ENOMEM;
workbuf = alloc_buf;
}
/* Allocate request */
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(
KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
ablkcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
/* Map the workpage */
workbuf = kmap(ctx->workpage);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
if (iname->len < ciphertext_len)
@ -101,21 +116,16 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
/* Create encryption request */
sg_init_table(sg, 1);
sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
res = crypto_ablkcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
if (res >= 0) {
/* Copy the result to output */
memcpy(oname->name, workbuf, ciphertext_len);
res = ciphertext_len;
}
kunmap(ctx->workpage);
kfree(alloc_buf);
ablkcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
@ -132,20 +142,21 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
* Errors are returned as negative numbers.
* We trust the caller to allocate sufficient memory to oname string.
*/
static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
static int ext4_fname_decrypt(struct inode *inode,
const struct ext4_str *iname,
struct ext4_str *oname)
{
struct ext4_str tmp_in[2], tmp_out[1];
struct ablkcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist sg[1];
struct crypto_ablkcipher *tfm = ctx->ctfm;
struct scatterlist src_sg, dst_sg;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
char *workbuf;
unsigned lim = max_name_len(inode);
if (iname->len <= 0 || iname->len > ctx->lim)
if (iname->len <= 0 || iname->len > lim)
return -EIO;
tmp_in[0].name = iname->name;
@ -163,31 +174,19 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
/* Map the workpage */
workbuf = kmap(ctx->workpage);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
/* Initialize IV */
memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
/* Create encryption request */
sg_init_table(sg, 1);
sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
res = crypto_ablkcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
if (res >= 0) {
/* Copy the result to output */
memcpy(oname->name, workbuf, iname->len);
res = iname->len;
}
kunmap(ctx->workpage);
ablkcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
@ -253,207 +252,6 @@ static int digest_decode(const char *src, int len, char *dst)
return cp - dst;
}
/**
* ext4_free_fname_crypto_ctx() -
*
* Frees up a crypto context.
*/
void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
{
if (ctx == NULL || IS_ERR(ctx))
return;
if (ctx->ctfm && !IS_ERR(ctx->ctfm))
crypto_free_ablkcipher(ctx->ctfm);
if (ctx->htfm && !IS_ERR(ctx->htfm))
crypto_free_hash(ctx->htfm);
if (ctx->workpage && !IS_ERR(ctx->workpage))
__free_page(ctx->workpage);
kfree(ctx);
}
/**
* ext4_put_fname_crypto_ctx() -
*
* Return: The crypto context onto free list. If the free list is above a
* threshold, completely frees up the context, and returns the memory.
*
* TODO: Currently we directly free the crypto context. Eventually we should
* add code it to return to free list. Such an approach will increase
* efficiency of directory lookup.
*/
void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
{
if (*ctx == NULL || IS_ERR(*ctx))
return;
ext4_free_fname_crypto_ctx(*ctx);
*ctx = NULL;
}
/**
* ext4_search_fname_crypto_ctx() -
*/
static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
const struct ext4_encryption_key *key)
{
return NULL;
}
/**
* ext4_alloc_fname_crypto_ctx() -
*/
struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
const struct ext4_encryption_key *key)
{
struct ext4_fname_crypto_ctx *ctx;
ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
if (ctx == NULL)
return ERR_PTR(-ENOMEM);
if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
/* This will automatically set key mode to invalid
* As enum for ENCRYPTION_MODE_INVALID is zero */
memset(&ctx->key, 0, sizeof(ctx->key));
} else {
memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
}
ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
? 0 : 1;
ctx->ctfm_key_is_ready = 0;
ctx->ctfm = NULL;
ctx->htfm = NULL;
ctx->workpage = NULL;
return ctx;
}
/**
* ext4_get_fname_crypto_ctx() -
*
* Allocates a free crypto context and initializes it to hold
* the crypto material for the inode.
*
* Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
*/
struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
struct inode *inode, u32 max_ciphertext_len)
{
struct ext4_fname_crypto_ctx *ctx;
struct ext4_inode_info *ei = EXT4_I(inode);
int res;
/* Check if the crypto policy is set on the inode */
res = ext4_encrypted_inode(inode);
if (res == 0)
return NULL;
if (!ext4_has_encryption_key(inode))
ext4_generate_encryption_key(inode);
/* Get a crypto context based on the key.
* A new context is allocated if no context matches the requested key.
*/
ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
if (ctx == NULL)
ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
if (IS_ERR(ctx))
return ctx;
ctx->flags = ei->i_crypt_policy_flags;
if (ctx->has_valid_key) {
if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
printk_once(KERN_WARNING
"ext4: unsupported key mode %d\n",
ctx->key.mode);
return ERR_PTR(-ENOKEY);
}
/* As a first cut, we will allocate new tfm in every call.
* later, we will keep the tfm around, in case the key gets
* re-used */
if (ctx->ctfm == NULL) {
ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
0, 0);
}
if (IS_ERR(ctx->ctfm)) {
res = PTR_ERR(ctx->ctfm);
printk(
KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
__func__, res);
ctx->ctfm = NULL;
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(res);
}
if (ctx->ctfm == NULL) {
printk(
KERN_DEBUG "%s: could not allocate crypto tfm\n",
__func__);
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(-ENOMEM);
}
if (ctx->workpage == NULL)
ctx->workpage = alloc_page(GFP_NOFS);
if (IS_ERR(ctx->workpage)) {
res = PTR_ERR(ctx->workpage);
printk(
KERN_DEBUG "%s: error (%d) allocating work page\n",
__func__, res);
ctx->workpage = NULL;
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(res);
}
if (ctx->workpage == NULL) {
printk(
KERN_DEBUG "%s: could not allocate work page\n",
__func__);
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(-ENOMEM);
}
ctx->lim = max_ciphertext_len;
crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
/* If we are lucky, we will get a context that is already
* set up with the right key. Else, we will have to
* set the key */
if (!ctx->ctfm_key_is_ready) {
/* Since our crypto objectives for filename encryption
* are pretty weak,
* we directly use the inode master key */
res = crypto_ablkcipher_setkey(ctx->ctfm,
ctx->key.raw, ctx->key.size);
if (res) {
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(-EIO);
}
ctx->ctfm_key_is_ready = 1;
} else {
/* In the current implementation, key should never be
* marked "ready" for a context that has just been
* allocated. So we should never reach here */
BUG();
}
}
if (ctx->htfm == NULL)
ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->htfm)) {
res = PTR_ERR(ctx->htfm);
printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
__func__, res);
ctx->htfm = NULL;
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(res);
}
if (ctx->htfm == NULL) {
printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
__func__);
ext4_put_fname_crypto_ctx(&ctx);
return ERR_PTR(-ENOMEM);
}
return ctx;
}
/**
* ext4_fname_crypto_round_up() -
*
@ -464,44 +262,29 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
return ((size+blksize-1)/blksize)*blksize;
}
/**
* ext4_fname_crypto_namelen_on_disk() -
*/
int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
u32 namelen)
unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen)
{
u32 ciphertext_len;
int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
int padding = 32;
if (ctx == NULL)
return -EIO;
if (!(ctx->has_valid_key))
return -EACCES;
ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
EXT4_CRYPTO_BLOCK_SIZE : namelen;
ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > ctx->lim)
? ctx->lim : ciphertext_len;
return (int) ciphertext_len;
if (ci)
padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
if (ilen < EXT4_CRYPTO_BLOCK_SIZE)
ilen = EXT4_CRYPTO_BLOCK_SIZE;
return ext4_fname_crypto_round_up(ilen, padding);
}
/**
* ext4_fname_crypto_alloc_obuff() -
/*
* ext4_fname_crypto_alloc_buffer() -
*
* Allocates an output buffer that is sufficient for the crypto operation
* specified by the context and the direction.
*/
int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_crypto_alloc_buffer(struct inode *inode,
u32 ilen, struct ext4_str *crypto_str)
{
unsigned int olen;
int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
unsigned int olen = ext4_fname_encrypted_size(inode, ilen);
if (!ctx)
return -EIO;
if (padding < EXT4_CRYPTO_BLOCK_SIZE)
padding = EXT4_CRYPTO_BLOCK_SIZE;
olen = ext4_fname_crypto_round_up(ilen, padding);
crypto_str->len = olen;
if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@ -529,7 +312,7 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
/**
* ext4_fname_disk_to_usr() - converts a filename from disk space to user space
*/
int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
int _ext4_fname_disk_to_usr(struct inode *inode,
struct dx_hash_info *hinfo,
const struct ext4_str *iname,
struct ext4_str *oname)
@ -537,8 +320,6 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
char buf[24];
int ret;
if (ctx == NULL)
return -EIO;
if (iname->len < 3) {
/*Check for . and .. */
if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
@ -548,8 +329,8 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
return oname->len;
}
}
if (ctx->has_valid_key)
return ext4_fname_decrypt(ctx, iname, oname);
if (EXT4_I(inode)->i_crypt_info)
return ext4_fname_decrypt(inode, iname, oname);
if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
ret = digest_encode(iname->name, iname->len, oname->name);
@ -568,7 +349,7 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
return ret + 1;
}
int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_disk_to_usr(struct inode *inode,
struct dx_hash_info *hinfo,
const struct ext4_dir_entry_2 *de,
struct ext4_str *oname)
@ -576,21 +357,20 @@ int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
struct ext4_str iname = {.name = (unsigned char *) de->name,
.len = de->name_len };
return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname);
}
/**
* ext4_fname_usr_to_disk() - converts a filename from user space to disk space
*/
int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct ext4_str *oname)
{
int res;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
if (ctx == NULL)
return -EIO;
if (iname->len < 3) {
/*Check for . and .. */
if (iname->name[0] == '.' &&
@ -601,8 +381,8 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
return oname->len;
}
}
if (ctx->has_valid_key) {
res = ext4_fname_encrypt(ctx, iname, oname);
if (ci) {
res = ext4_fname_encrypt(inode, iname, oname);
return res;
}
/* Without a proper key, a user is not allowed to modify the filenames
@ -611,109 +391,79 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
return -EACCES;
}
/*
* Calculate the htree hash from a filename from user space
*/
int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
const struct qstr *iname,
struct dx_hash_info *hinfo)
int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct ext4_filename *fname)
{
struct ext4_str tmp;
int ret = 0;
char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
struct ext4_crypt_info *ci;
int ret = 0, bigname = 0;
if (!ctx ||
memset(fname, 0, sizeof(struct ext4_filename));
fname->usr_fname = iname;
if (!ext4_encrypted_inode(dir) ||
((iname->name[0] == '.') &&
((iname->len == 1) ||
((iname->name[1] == '.') && (iname->len == 2))))) {
ext4fs_dirhash(iname->name, iname->len, hinfo);
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
return 0;
}
if (!ctx->has_valid_key && iname->name[0] == '_') {
if (iname->len != 33)
return -ENOENT;
ret = digest_decode(iname->name+1, iname->len, buf);
if (ret != 24)
return -ENOENT;
memcpy(&hinfo->hash, buf, 4);
memcpy(&hinfo->minor_hash, buf + 4, 4);
return 0;
}
if (!ctx->has_valid_key && iname->name[0] != '_') {
if (iname->len > 43)
return -ENOENT;
ret = digest_decode(iname->name, iname->len, buf);
ext4fs_dirhash(buf, ret, hinfo);
return 0;
}
/* First encrypt the plaintext name */
ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
if (ret < 0)
ret = ext4_get_encryption_info(dir);
if (ret)
return ret;
ret = ext4_fname_encrypt(ctx, iname, &tmp);
if (ret >= 0) {
ext4fs_dirhash(tmp.name, tmp.len, hinfo);
ret = 0;
}
ext4_fname_crypto_free_buffer(&tmp);
return ret;
}
int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
int len, const char * const name,
struct ext4_dir_entry_2 *de)
{
int ret = -ENOENT;
int bigname = (*name == '_');
if (ctx->has_valid_key) {
if (cstr->name == NULL) {
struct qstr istr;
ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
if (ret < 0)
goto errout;
istr.name = name;
istr.len = len;
ret = ext4_fname_encrypt(ctx, &istr, cstr);
if (ret < 0)
goto errout;
}
} else {
if (cstr->name == NULL) {
cstr->name = kmalloc(32, GFP_KERNEL);
if (cstr->name == NULL)
return -ENOMEM;
if ((bigname && (len != 33)) ||
(!bigname && (len > 43)))
goto errout;
ret = digest_decode(name+bigname, len-bigname,
cstr->name);
if (ret < 0) {
ret = -ENOENT;
goto errout;
}
cstr->len = ret;
}
if (bigname) {
if (de->name_len < 16)
return 0;
ret = memcmp(de->name + de->name_len - 16,
cstr->name + 8, 16);
return (ret == 0) ? 1 : 0;
}
}
if (de->name_len != cstr->len)
ci = EXT4_I(dir)->i_crypt_info;
if (ci) {
ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
&fname->crypto_buf);
if (ret < 0)
return ret;
ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf);
if (ret < 0)
goto errout;
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
return 0;
ret = memcmp(de->name, cstr->name, cstr->len);
return (ret == 0) ? 1 : 0;
}
if (!lookup)
return -EACCES;
/* We don't have the key and we are doing a lookup; decode the
* user-supplied name
*/
if (iname->name[0] == '_')
bigname = 1;
if ((bigname && (iname->len != 33)) ||
(!bigname && (iname->len > 43)))
return -ENOENT;
fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
ret = digest_decode(iname->name + bigname, iname->len - bigname,
fname->crypto_buf.name);
if (ret < 0) {
ret = -ENOENT;
goto errout;
}
fname->crypto_buf.len = ret;
if (bigname) {
memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4);
memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4);
} else {
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
}
return 0;
errout:
kfree(cstr->name);
cstr->name = NULL;
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
return ret;
}
void ext4_fname_free_filename(struct ext4_filename *fname)
{
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
}

View File

@ -84,14 +84,38 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
return res;
}
/**
* ext4_generate_encryption_key() - generates an encryption key
* @inode: The inode to generate the encryption key for.
*/
int ext4_generate_encryption_key(struct inode *inode)
void ext4_free_crypt_info(struct ext4_crypt_info *ci)
{
if (!ci)
return;
if (ci->ci_keyring_key)
key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
void ext4_free_encryption_info(struct inode *inode,
struct ext4_crypt_info *ci)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
struct ext4_crypt_info *prev;
if (ci == NULL)
ci = ACCESS_ONCE(ei->i_crypt_info);
if (ci == NULL)
return;
prev = cmpxchg(&ei->i_crypt_info, ci, NULL);
if (prev != ci)
return;
ext4_free_crypt_info(ci);
}
int _ext4_get_encryption_info(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_crypt_info *crypt_info;
char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
(EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
struct key *keyring_key = NULL;
@ -99,32 +123,77 @@ int ext4_generate_encryption_key(struct inode *inode)
struct ext4_encryption_context ctx;
struct user_key_payload *ukp;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
struct crypto_ablkcipher *ctfm;
const char *cipher_str;
char raw_key[EXT4_MAX_KEY_SIZE];
char mode;
int res;
if (!ext4_read_workqueue) {
res = ext4_init_crypto();
if (res)
return res;
}
retry:
crypt_info = ACCESS_ONCE(ei->i_crypt_info);
if (crypt_info) {
if (!crypt_info->ci_keyring_key ||
key_validate(crypt_info->ci_keyring_key) == 0)
return 0;
ext4_free_encryption_info(inode, crypt_info);
goto retry;
}
res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
if (res != sizeof(ctx)) {
if (res > 0)
res = -EINVAL;
goto out;
}
if (res < 0) {
if (!DUMMY_ENCRYPTION_ENABLED(sbi))
return res;
ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
} else if (res != sizeof(ctx))
return -EINVAL;
res = 0;
ei->i_crypt_policy_flags = ctx.flags;
crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
crypt_key->mode = ctx.contents_encryption_mode;
mode = crypt_info->ci_data_mode;
else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
crypt_key->mode = ctx.filenames_encryption_mode;
else {
printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
mode = crypt_info->ci_filename_mode;
else
BUG();
}
crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
BUG_ON(!crypt_key->size);
if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
break;
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
default:
printk_once(KERN_WARNING
"ext4: unsupported key mode %d (ino %u)\n",
mode, (unsigned) inode->i_ino);
res = -ENOKEY;
goto out;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
goto got_key;
}
memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
EXT4_KEY_DESC_PREFIX_SIZE);
sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
@ -138,6 +207,7 @@ int ext4_generate_encryption_key(struct inode *inode)
keyring_key = NULL;
goto out;
}
crypt_info->ci_keyring_key = keyring_key;
BUG_ON(keyring_key->type != &key_type_logon);
ukp = ((struct user_key_payload *)keyring_key->payload.data);
if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
@ -148,19 +218,43 @@ int ext4_generate_encryption_key(struct inode *inode)
BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
EXT4_KEY_DERIVATION_NONCE_SIZE);
BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
got_key:
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
"%s: error %d (inode %u) allocating crypto tfm\n",
__func__, res, (unsigned) inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_ablkcipher_clear_flags(ctfm, ~0);
crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
res = crypto_ablkcipher_setkey(ctfm, raw_key,
ext4_encryption_key_size(mode));
if (res)
goto out;
memzero_explicit(raw_key, sizeof(raw_key));
if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
ext4_free_crypt_info(crypt_info);
goto retry;
}
return 0;
out:
if (keyring_key)
key_put(keyring_key);
if (res < 0)
crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
if (res == -ENOKEY)
res = 0;
ext4_free_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
int ext4_has_encryption_key(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
return (ei->i_crypt_info != NULL);
}

View File

@ -51,6 +51,10 @@ static int ext4_create_encryption_context_from_policy(
struct ext4_encryption_context ctx;
int res = 0;
res = ext4_convert_inline_data(inode);
if (res)
return res;
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE);
@ -89,6 +93,8 @@ int ext4_process_policy(const struct ext4_encryption_policy *policy,
return -EINVAL;
if (!ext4_inode_has_encryption_context(inode)) {
if (!S_ISDIR(inode->i_mode))
return -EINVAL;
if (!ext4_empty_dir(inode))
return -ENOTEMPTY;
return ext4_create_encryption_context_from_policy(inode,
@ -126,7 +132,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
int ext4_is_child_context_consistent_with_parent(struct inode *parent,
struct inode *child)
{
struct ext4_encryption_context parent_ctx, child_ctx;
struct ext4_crypt_info *parent_ci, *child_ci;
int res;
if ((parent == NULL) || (child == NULL)) {
@ -136,26 +142,28 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
/* no restrictions if the parent directory is not encrypted */
if (!ext4_encrypted_inode(parent))
return 1;
res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&parent_ctx, sizeof(parent_ctx));
if (res != sizeof(parent_ctx))
return 0;
/* if the child directory is not encrypted, this is always a problem */
if (!ext4_encrypted_inode(child))
return 0;
res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&child_ctx, sizeof(child_ctx));
if (res != sizeof(child_ctx))
res = ext4_get_encryption_info(parent);
if (res)
return 0;
return (memcmp(parent_ctx.master_key_descriptor,
child_ctx.master_key_descriptor,
res = ext4_get_encryption_info(child);
if (res)
return 0;
parent_ci = EXT4_I(parent)->i_crypt_info;
child_ci = EXT4_I(child)->i_crypt_info;
if (!parent_ci && !child_ci)
return 1;
if (!parent_ci || !child_ci)
return 0;
return (memcmp(parent_ci->ci_master_key,
child_ci->ci_master_key,
EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ctx.contents_encryption_mode ==
child_ctx.contents_encryption_mode) &&
(parent_ctx.filenames_encryption_mode ==
child_ctx.filenames_encryption_mode));
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags));
}
/**
@ -168,31 +176,40 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
int ext4_inherit_context(struct inode *parent, struct inode *child)
{
struct ext4_encryption_context ctx;
int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
struct ext4_crypt_info *ci;
int res;
if (res != sizeof(ctx)) {
if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42,
EXT4_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
goto out;
}
res = ext4_get_encryption_info(parent);
if (res < 0)
return res;
ci = EXT4_I(parent)->i_crypt_info;
if (ci == NULL)
return -ENOKEY;
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42,
EXT4_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
EXT4_KEY_DESCRIPTOR_SIZE);
}
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0);
out:
if (!res)
if (!res) {
ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
res = ext4_get_encryption_info(child);
}
return res;
}

View File

@ -110,7 +110,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
int dir_has_error = 0;
struct ext4_fname_crypto_ctx *enc_ctx = NULL;
struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
if (is_dx_dir(inode)) {
@ -134,16 +133,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
if (IS_ERR(enc_ctx))
return PTR_ERR(enc_ctx);
if (enc_ctx) {
err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
if (ext4_encrypted_inode(inode)) {
err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN,
&fname_crypto_str);
if (err < 0) {
ext4_put_fname_crypto_ctx(&enc_ctx);
if (err < 0)
return err;
}
}
offset = ctx->pos & (sb->s_blocksize - 1);
@ -239,17 +233,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
if (le32_to_cpu(de->inode)) {
if (enc_ctx == NULL) {
/* Directory is not encrypted */
if (!ext4_encrypted_inode(inode)) {
if (!dir_emit(ctx, de->name,
de->name_len,
le32_to_cpu(de->inode),
get_dtype(sb, de->file_type)))
goto done;
} else {
int save_len = fname_crypto_str.len;
/* Directory is encrypted */
err = ext4_fname_disk_to_usr(enc_ctx,
err = ext4_fname_disk_to_usr(inode,
NULL, de, &fname_crypto_str);
fname_crypto_str.len = save_len;
if (err < 0)
goto errout;
if (!dir_emit(ctx,
@ -272,7 +268,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
err = 0;
errout:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
ext4_put_fname_crypto_ctx(&enc_ctx);
ext4_fname_crypto_free_buffer(&fname_crypto_str);
#endif
brelse(bh);
@ -598,6 +593,13 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
static int ext4_dir_open(struct inode * inode, struct file * filp)
{
if (ext4_encrypted_inode(inode))
return ext4_get_encryption_info(inode) ? -EACCES : 0;
return 0;
}
static int ext4_release_dir(struct inode *inode, struct file *filp)
{
if (filp->private_data)
@ -640,5 +642,6 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
.open = ext4_dir_open,
.release = ext4_release_dir,
};

View File

@ -69,15 +69,6 @@
#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@ -90,6 +81,11 @@ typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned int ext4_group_t;
enum SHIFT_DIRECTION {
SHIFT_LEFT = 0,
SHIFT_RIGHT,
};
/*
* Flags used in mballoc's allocation_context flags field.
*
@ -911,7 +907,6 @@ struct ext4_inode_info {
/* on-disk additional length */
__u16 i_extra_isize;
char i_crypt_policy_flags;
/* Indicate the inline data space. */
u16 i_inline_off;
@ -955,7 +950,7 @@ struct ext4_inode_info {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption params */
struct ext4_encryption_key i_encryption_key;
struct ext4_crypt_info *i_crypt_info;
#endif
};
@ -1374,12 +1369,6 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption */
uint32_t s_file_encryption_mode;
uint32_t s_dir_encryption_mode;
#endif
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@ -1838,6 +1827,17 @@ struct dx_hash_info
*/
#define HASH_NB_ALWAYS 1
struct ext4_filename {
const struct qstr *usr_fname;
struct ext4_str disk_name;
struct dx_hash_info hinfo;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct ext4_str crypto_buf;
#endif
};
#define fname_name(p) ((p)->disk_name.name)
#define fname_len(p) ((p)->disk_name.len)
/*
* Describe an inode's exact location on disk and in memory
@ -2054,6 +2054,7 @@ int ext4_get_policy(struct inode *inode,
struct ext4_encryption_policy *policy);
/* crypto.c */
extern struct kmem_cache *ext4_crypt_info_cachep;
bool ext4_valid_contents_enc_mode(uint32_t mode);
uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
extern struct workqueue_struct *ext4_read_workqueue;
@ -2085,57 +2086,84 @@ static inline int ext4_sb_has_crypto(struct super_block *sb)
/* crypto_fname.c */
bool ext4_valid_filenames_enc_mode(uint32_t mode);
u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
int ext4_fname_crypto_alloc_buffer(struct inode *inode,
u32 ilen, struct ext4_str *crypto_str);
int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
int _ext4_fname_disk_to_usr(struct inode *inode,
struct dx_hash_info *hinfo,
const struct ext4_str *iname,
struct ext4_str *oname);
int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_disk_to_usr(struct inode *inode,
struct dx_hash_info *hinfo,
const struct ext4_dir_entry_2 *de,
struct ext4_str *oname);
int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct ext4_str *oname);
int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
const struct qstr *iname,
struct dx_hash_info *hinfo);
int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
u32 namelen);
int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
int len, const char * const name,
struct ext4_dir_entry_2 *de);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
u32 max_len);
void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct ext4_filename *fname);
void ext4_fname_free_filename(struct ext4_filename *fname);
#else
static inline
void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
static inline
struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
u32 max_len)
int ext4_setup_fname_crypto(struct inode *inode)
{
return NULL;
return 0;
}
static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
static inline int ext4_fname_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct ext4_filename *fname)
{
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
return 0;
}
static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
#endif
/* crypto_key.c */
int ext4_generate_encryption_key(struct inode *inode);
void ext4_free_crypt_info(struct ext4_crypt_info *ci);
void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
int _ext4_get_encryption_info(struct inode *inode);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_has_encryption_key(struct inode *inode);
static inline int ext4_get_encryption_info(struct inode *inode)
{
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
if (!ci ||
(ci->ci_keyring_key &&
(ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_DEAD)))))
return _ext4_get_encryption_info(inode);
return 0;
}
static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return EXT4_I(inode)->i_crypt_info;
}
#else
static inline int ext4_has_encryption_key(struct inode *inode)
{
return 0;
}
static inline int ext4_get_encryption_info(struct inode *inode)
{
return 0;
}
static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
return NULL;
}
#endif
@ -2156,14 +2184,13 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
const char *name, int namelen,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **dest_de);
int ext4_insert_dentry(struct inode *dir,
struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
const struct qstr *iname,
const char *name, int namelen);
struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
@ -2317,13 +2344,14 @@ extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
__u32 start_minor_hash, __u32 *next_hash);
extern int search_dir(struct buffer_head *bh,
char *search_buf,
int buf_size,
struct inode *dir,
const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_search_dir(struct buffer_head *bh,
char *search_buf,
int buf_size,
struct inode *dir,
struct ext4_filename *fname,
const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_generic_delete_entry(handle_t *handle,
struct inode *dir,
struct ext4_dir_entry_2 *de_del,
@ -2368,6 +2396,9 @@ void __ext4_abort(struct super_block *, const char *, unsigned int,
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
extern __printf(4, 5)
void __ext4_warning_inode(const struct inode *inode, const char *function,
unsigned int line, const char *fmt, ...);
extern __printf(3, 4)
void __ext4_msg(struct super_block *, const char *, const char *, ...);
extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
@ -2378,6 +2409,15 @@ void __ext4_grp_locked_error(const char *, unsigned int,
unsigned long, ext4_fsblk_t,
const char *, ...);
#define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
#ifdef CONFIG_PRINTK
#define ext4_error_inode(inode, func, line, block, fmt, ...) \
@ -2390,6 +2430,8 @@ void __ext4_grp_locked_error(const char *, unsigned int,
__ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning(sb, fmt, ...) \
__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning_inode(inode, fmt, ...) \
__ext4_warning_inode(inode, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_msg(sb, level, fmt, ...) \
__ext4_msg(sb, level, fmt, ##__VA_ARGS__)
#define dump_mmp_msg(sb, mmp, msg) \
@ -2425,6 +2467,11 @@ do { \
no_printk(fmt, ##__VA_ARGS__); \
__ext4_warning(sb, "", 0, " "); \
} while (0)
#define ext4_warning_inode(inode, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
__ext4_warning_inode(inode, "", 0, " "); \
} while (0)
#define ext4_msg(sb, level, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
@ -2768,7 +2815,9 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
unsigned len, unsigned copied,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
struct dentry *dentry,
struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
@ -2782,6 +2831,7 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
__u32 start_hash, __u32 start_minor_hash,
int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data);
@ -2913,6 +2963,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1,
ext4_lblk_t lblk2, ext4_lblk_t count,

View File

@ -66,24 +66,39 @@ struct ext4_encryption_context {
#define EXT4_KEY_DESC_PREFIX "ext4:"
#define EXT4_KEY_DESC_PREFIX_SIZE 5
/* This is passed in from userspace into the kernel keyring */
struct ext4_encryption_key {
uint32_t mode;
char raw[EXT4_MAX_KEY_SIZE];
uint32_t size;
__u32 mode;
char raw[EXT4_MAX_KEY_SIZE];
__u32 size;
} __attribute__((__packed__));
struct ext4_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
};
#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002
#define EXT4_WRITE_PATH_FL 0x00000002
struct ext4_crypto_ctx {
struct crypto_tfm *tfm; /* Crypto API context */
struct page *bounce_page; /* Ciphertext page on write path */
struct page *control_page; /* Original page on write path */
struct bio *bio; /* The bio for this context */
struct work_struct work; /* Work queue for read complete path */
struct list_head free_list; /* Free list */
int flags; /* Flags */
int mode; /* Encryption mode for tfm */
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
char flags; /* Flags */
char mode; /* Encryption mode for tfm */
};
struct ext4_completion_result {
@ -121,18 +136,6 @@ struct ext4_str {
u32 len;
};
struct ext4_fname_crypto_ctx {
u32 lim;
char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
struct crypto_ablkcipher *ctfm;
struct crypto_hash *htfm;
struct page *workpage;
struct ext4_encryption_key key;
unsigned flags : 8;
unsigned has_valid_key : 1;
unsigned ctfm_key_is_ready : 1;
};
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.

View File

@ -4456,6 +4456,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ar.flags |= EXT4_MB_DELALLOC_RESERVED;
if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
ar.flags |= EXT4_MB_USE_RESERVED;
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@ -4663,6 +4665,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
int ret = 0;
int ret2 = 0;
int retries = 0;
int depth = 0;
struct ext4_map_blocks map;
unsigned int credits;
loff_t epos;
@ -4677,13 +4680,32 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, len);
/*
* We can only call ext_depth() on extent based inodes
*/
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
depth = ext_depth(inode);
else
depth = -1;
retry:
while (ret >= 0 && len) {
/*
* Recalculate credits when extent tree depth changes.
*/
if (depth >= 0 && depth != ext_depth(inode)) {
credits = ext4_chunk_trans_blocks(inode, len);
depth = ext_depth(inode);
}
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
credits);
if (IS_ERR(handle)) {
@ -4725,6 +4747,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
goto retry;
}
ext4_inode_resume_unlocked_dio(inode);
return ret > 0 ? ret2 : ret;
}
@ -4912,12 +4936,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
* bug we should fix....
*/
if (ext4_encrypted_inode(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
FALLOC_FL_ZERO_RANGE)))
return -EOPNOTSUPP;
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
@ -4930,6 +4956,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);
if (mode & FALLOC_FL_INSERT_RANGE)
return ext4_insert_range(inode, offset, len);
if (mode & FALLOC_FL_ZERO_RANGE)
return ext4_zero_range(file, offset, len, mode);
@ -5224,13 +5253,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
/*
* ext4_ext_shift_path_extents:
* Shift the extents of a path structure lying between path[depth].p_ext
* and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
* from starting block for each extent.
* and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
* if it is right shift or left shift operation.
*/
static int
ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
struct inode *inode, handle_t *handle,
ext4_lblk_t *start)
enum SHIFT_DIRECTION SHIFT)
{
int depth, err = 0;
struct ext4_extent *ex_start, *ex_last;
@ -5252,19 +5281,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
update = 1;
*start = le32_to_cpu(ex_last->ee_block) +
ext4_ext_get_actual_len(ex_last);
while (ex_start <= ex_last) {
le32_add_cpu(&ex_start->ee_block, -shift);
/* Try to merge to the left. */
if ((ex_start >
EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
ext4_ext_try_to_merge_right(inode,
path, ex_start - 1))
if (SHIFT == SHIFT_LEFT) {
le32_add_cpu(&ex_start->ee_block,
-shift);
/* Try to merge to the left. */
if ((ex_start >
EXT_FIRST_EXTENT(path[depth].p_hdr))
&&
ext4_ext_try_to_merge_right(inode,
path, ex_start - 1))
ex_last--;
else
ex_start++;
} else {
le32_add_cpu(&ex_last->ee_block, shift);
ext4_ext_try_to_merge_right(inode, path,
ex_last);
ex_last--;
else
ex_start++;
}
}
err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
@ -5279,7 +5314,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (err)
goto out;
le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
if (SHIFT == SHIFT_LEFT)
le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
else
le32_add_cpu(&path[depth].p_idx->ei_block, shift);
err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
goto out;
@ -5297,19 +5335,20 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
/*
* ext4_ext_shift_extents:
* All the extents which lies in the range from start to the last allocated
* block for the file are shifted downwards by shift blocks.
* All the extents which lies in the range from @start to the last allocated
* block for the @inode are shifted either towards left or right (depending
* upon @SHIFT) by @shift blocks.
* On success, 0 is returned, error otherwise.
*/
static int
ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t start, ext4_lblk_t shift)
ext4_lblk_t start, ext4_lblk_t shift,
enum SHIFT_DIRECTION SHIFT)
{
struct ext4_ext_path *path;
int ret = 0, depth;
struct ext4_extent *extent;
ext4_lblk_t stop_block;
ext4_lblk_t ex_start, ex_end;
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
@ -5321,58 +5360,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
stop_block = le32_to_cpu(extent->ee_block) +
stop = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
/* Nothing to shift, if hole is at the end of file */
if (start >= stop_block)
goto out;
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
path = ext4_find_extent(inode, start - 1, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
if (extent) {
ex_start = le32_to_cpu(extent->ee_block);
ex_end = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
ex_start = 0;
ex_end = 0;
}
/*
* Don't start shifting extents until we make sure the hole is big
* enough to accomodate the shift.
*/
path = ext4_find_extent(inode, start - 1, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
if (extent) {
ex_start = le32_to_cpu(extent->ee_block);
ex_end = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
ex_start = 0;
ex_end = 0;
if ((start == ex_start && shift > ex_start) ||
(shift > start - ex_end)) {
ext4_ext_drop_refs(path);
kfree(path);
return -EINVAL;
}
}
if ((start == ex_start && shift > ex_start) ||
(shift > start - ex_end))
return -EINVAL;
/*
* In case of left shift, iterator points to start and it is increased
* till we reach stop. In case of right shift, iterator points to stop
* and it is decreased till we reach start.
*/
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
iterator = &stop;
/* Its safe to start updating extents */
while (start < stop_block) {
path = ext4_find_extent(inode, start, &path, 0);
while (start < stop) {
path = ext4_find_extent(inode, *iterator, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
if (!extent) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) start);
(unsigned long) *iterator);
return -EIO;
}
if (start > le32_to_cpu(extent->ee_block)) {
if (SHIFT == SHIFT_LEFT && *iterator >
le32_to_cpu(extent->ee_block)) {
/* Hole, move to the next extent */
if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
path[depth].p_ext++;
} else {
start = ext4_ext_next_allocated_block(path);
*iterator = ext4_ext_next_allocated_block(path);
continue;
}
}
if (SHIFT == SHIFT_LEFT) {
extent = EXT_LAST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) > 0 ?
le32_to_cpu(extent->ee_block) - 1 : 0;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
handle, &start);
handle, SHIFT);
if (ret)
break;
}
@ -5485,7 +5550,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_discard_preallocations(inode);
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start);
punch_stop - punch_start, SHIFT_LEFT);
if (ret) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
@ -5510,6 +5575,174 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
/*
* ext4_insert_range:
* This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
* The data blocks starting from @offset to the EOF are shifted by @len
* towards right to create a hole in the @inode. Inode size is increased
* by len bytes.
* Returns 0 on success, error otherwise.
*/
int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
handle_t *handle;
struct ext4_ext_path *path;
struct ext4_extent *extent;
ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
unsigned int credits, ee_len;
int ret = 0, depth, split_flag = 0;
loff_t ioffset;
/*
* We need to test this early because xfstests assumes that an
* insert range of (0, 1) will return EOPNOTSUPP if the file
* system does not support insert range.
*/
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return -EOPNOTSUPP;
/* Insert range works only on fs block size aligned offsets. */
if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
trace_ext4_insert_range(inode, offset, len);
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
/* Call ext4_force_commit to flush all data in case of data=journal */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
/*
* Need to round down to align start offset to page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret)
return ret;
/* Take mutex lock */
mutex_lock(&inode->i_mutex);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
goto out_mutex;
}
/* Check for wrap through zero */
if (inode->i_size + len > inode->i_sb->s_maxbytes) {
ret = -EFBIG;
goto out_mutex;
}
/* Offset should be less than i_size */
if (offset >= i_size_read(inode)) {
ret = -EINVAL;
goto out_mutex;
}
truncate_pagecache(inode, ioffset);
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out_dio;
}
/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
EXT4_I(inode)->i_disksize += len;
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ret = ext4_mark_inode_dirty(handle, inode);
if (ret)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
depth = ext_depth(inode);
extent = path[depth].p_ext;
if (extent) {
ee_start_lblk = le32_to_cpu(extent->ee_block);
ee_len = ext4_ext_get_actual_len(extent);
/*
* If offset_lblk is not the starting block of extent, split
* the extent @offset_lblk
*/
if ((offset_lblk > ee_start_lblk) &&
(offset_lblk < (ee_start_lblk + ee_len))) {
if (ext4_ext_is_unwritten(extent))
split_flag = EXT4_EXT_MARK_UNWRIT1 |
EXT4_EXT_MARK_UNWRIT2;
ret = ext4_split_extent_at(handle, inode, &path,
offset_lblk, split_flag,
EXT4_EX_NOCACHE |
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
}
ext4_ext_drop_refs(path);
kfree(path);
if (ret < 0) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
}
ret = ext4_es_remove_extent(inode, offset_lblk,
EXT_MAX_BLOCKS - offset_lblk);
if (ret) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
/*
* if offset_lblk lies in a hole which is at start of file, use
* ee_start_lblk to shift extents
*/
ret = ext4_ext_shift_extents(inode, handle,
ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
len_lblk, SHIFT_RIGHT);
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
out_stop:
ext4_journal_stop(handle);
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
return ret;
}
/**
* ext4_swap_extents - Swap extents between two inodes
*
@ -5542,7 +5775,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
BUG_ON(!mutex_is_locked(&inode2->i_mutex));
*erp = ext4_es_remove_extent(inode1, lblk1, count);
if (unlikely(*erp))

View File

@ -223,9 +223,11 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file->f_mapping->host;
if (ext4_encrypted_inode(inode)) {
int err = ext4_generate_encryption_key(inode);
int err = ext4_get_encryption_info(inode);
if (err)
return 0;
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
file_accessed(file);
if (IS_DAX(file_inode(file))) {
@ -278,6 +280,13 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
ext4_journal_stop(handle);
}
}
if (ext4_encrypted_inode(inode)) {
ret = ext4_get_encryption_info(inode);
if (ret)
return -EACCES;
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
@ -287,13 +296,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret < 0)
return ret;
}
ret = dquot_file_open(inode, filp);
if (!ret && ext4_encrypted_inode(inode)) {
ret = ext4_generate_encryption_key(inode);
if (ret)
ret = -EACCES;
}
return ret;
return dquot_file_open(inode, filp);
}
/*

View File

@ -726,11 +726,25 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ext4_group_t i;
ext4_group_t flex_group;
struct ext4_group_info *grp;
int encrypt = 0;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
err = ext4_get_encryption_info(dir);
if (err)
return ERR_PTR(err);
if (ext4_encryption_info(dir) == NULL)
return ERR_PTR(-EPERM);
if (!handle)
nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
@ -996,12 +1010,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
/* If the directory encrypted, then we should encrypt the inode. */
if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
(ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(sbi)))
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
@ -1034,28 +1042,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
(sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
ei->i_inline_off = 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_set_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
} else {
/* Inline data and encryption are incompatible
* We turn off inline data since encryption is enabled */
ei->i_inline_off = 1;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
}
#else
ei->i_inline_off = 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
#endif
ret = inode;
err = dquot_alloc_inode(inode);
if (err)
@ -1082,6 +1071,12 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_datasync_tid = handle->h_transaction->t_tid;
}
if (encrypt) {
err = ext4_inherit_context(dir, inode);
if (err)
goto fail_free_drop;
}
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
ext4_std_error(sb, err);

View File

@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
return -ENOSPC;
return -EUCLEAN;
}
/* Set up for the direct block allocation */
@ -576,6 +576,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = EXT4_MB_HINT_DATA;
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ar.flags |= EXT4_MB_DELALLOC_RESERVED;
if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
ar.flags |= EXT4_MB_USE_RESERVED;
ar.goal = ext4_find_goal(inode, map->m_lblk, partial);

View File

@ -995,20 +995,18 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
* and -EEXIST if directory entry already exists.
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
struct dentry *dentry,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
struct inode *dir = d_inode(dentry->d_parent);
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
int err;
struct ext4_dir_entry_2 *de;
err = ext4_find_dest_de(dir, inode, iloc->bh,
inline_start, inline_size,
name, namelen, &de);
err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
inline_size, fname, &de);
if (err)
return err;
@ -1016,8 +1014,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
err = ext4_journal_get_write_access(handle, iloc->bh);
if (err)
return err;
ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
name, namelen);
ext4_insert_dentry(dir, inode, de, inline_size, fname);
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
@ -1248,8 +1245,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
* If succeeds, return 0. If not, extended the inline dir and copied data to
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *inode)
{
int ret, inline_size;
void *inline_start;
@ -1268,7 +1265,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
@ -1289,8 +1286,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
inline_start, inline_size);
ret = ext4_add_dirent_to_inline(handle, fname, dentry,
inode, &iloc, inline_start,
inline_size);
if (ret != -ENOSPC)
goto out;
@ -1611,6 +1609,7 @@ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
}
struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
@ -1632,8 +1631,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = search_dir(iloc.bh, inline_start, inline_size,
dir, d_name, 0, res_dir);
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
dir, fname, d_name, 0, res_dir);
if (ret == 1)
goto out_find;
if (ret < 0)
@ -1645,8 +1644,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
ret = search_dir(iloc.bh, inline_start, inline_size,
dir, d_name, 0, res_dir);
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
dir, fname, d_name, 0, res_dir);
if (ret == 1)
goto out_find;

View File

@ -731,18 +731,18 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
* `handle' can be NULL if create is zero
*/
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create)
ext4_lblk_t block, int map_flags)
{
struct ext4_map_blocks map;
struct buffer_head *bh;
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
J_ASSERT(handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
err = ext4_map_blocks(handle, inode, &map,
create ? EXT4_GET_BLOCKS_CREATE : 0);
err = ext4_map_blocks(handle, inode, &map, map_flags);
if (err == 0)
return create ? ERR_PTR(-ENOSPC) : NULL;
@ -788,11 +788,11 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
}
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create)
ext4_lblk_t block, int map_flags)
{
struct buffer_head *bh;
bh = ext4_getblk(handle, inode, block, create);
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
if (!bh || buffer_uptodate(bh))
@ -1261,13 +1261,12 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
* Reserve a single cluster located at lblock
* Reserve space for a single cluster
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
static int ext4_da_reserve_space(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
/*
@ -1279,25 +1278,14 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
if (ret)
return ret;
/*
* recalculate the amount of metadata blocks to reserve
* in order to allocate nrblocks
* worse case is one extent per block
*/
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
* to be prepared undo if we fail to claim space.
*/
md_needed = 0;
trace_ext4_da_reserve_space(inode, 0);
if (ext4_claim_free_clusters(sbi, 1, 0)) {
spin_unlock(&ei->i_block_reservation_lock);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
ei->i_reserved_data_blocks++;
trace_ext4_da_reserve_space(inode);
spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
@ -1566,9 +1554,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* then we don't need to reserve it again. However we still need
* to reserve metadata for every block we're going to write.
*/
if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
ret = ext4_da_reserve_space(inode, iblock);
ret = ext4_da_reserve_space(inode);
if (ret) {
/* not enough space to reserve */
retval = ret;
@ -1701,19 +1689,32 @@ static int __ext4_journalled_writepage(struct page *page,
ext4_walk_page_buffers(handle, page_bufs, 0, len,
NULL, bget_one);
}
/* As soon as we unlock the page, it can go away, but we have
* references to buffers so we are safe */
/*
* We need to release the page lock before we start the
* journal, so grab a reference so the page won't disappear
* out from under us.
*/
get_page(page);
unlock_page(page);
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
ext4_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
put_page(page);
goto out_no_pagelock;
}
BUG_ON(!ext4_handle_valid(handle));
lock_page(page);
put_page(page);
if (page->mapping != mapping) {
/* The page got truncated from under us */
ext4_journal_stop(handle);
ret = 0;
goto out;
}
BUG_ON(!ext4_handle_valid(handle));
if (inline_data) {
BUFFER_TRACE(inode_bh, "get write access");
ret = ext4_journal_get_write_access(handle, inode_bh);
@ -1739,6 +1740,8 @@ static int __ext4_journalled_writepage(struct page *page,
NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
brelse(inode_bh);
return ret;
}
@ -4681,8 +4684,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_journal_stop(handle);
}
if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -4690,24 +4695,26 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size > sbi->s_bitmap_maxbytes)
return -EFBIG;
}
if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
if (S_ISREG(inode->i_mode) &&
if (ext4_should_order_data(inode) &&
(attr->ia_size < inode->i_size)) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
if (error)
goto err_out;
}
if (error)
goto err_out;
}
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
}
if (ext4_handle_valid(handle)) {
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
@ -4726,15 +4733,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
if (error) {
ext4_orphan_del(NULL, inode);
if (orphan)
ext4_orphan_del(NULL, inode);
goto err_out;
}
} else {
loff_t oldsize = inode->i_size;
i_size_write(inode, attr->ia_size);
pagecache_isize_extended(inode, oldsize, inode->i_size);
}
if (!shrink)
pagecache_isize_extended(inode, oldsize, inode->i_size);
/*
* Blocks are going to be removed from the inode. Wait
@ -4754,13 +4759,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
if (shrink)
ext4_truncate(inode);
}
/*
* We want to call ext4_truncate() even if attr->ia_size ==
* inode->i_size for cases like truncation of fallocated space
*/
if (attr->ia_valid & ATTR_SIZE)
ext4_truncate(inode);
if (!rc) {
setattr_copy(inode, attr);

View File

@ -31,14 +31,11 @@
static void memswap(void *a, void *b, size_t len)
{
unsigned char *ap, *bp;
unsigned char tmp;
ap = (unsigned char *)a;
bp = (unsigned char *)b;
while (len-- > 0) {
tmp = *ap;
*ap = *bp;
*bp = tmp;
swap(*ap, *bp);
ap++;
bp++;
}
@ -675,8 +672,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
return err;
}
if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
16))
if (copy_to_user((void __user *) arg,
sbi->s_es->s_encrypt_pw_salt, 16))
return -EFAULT;
return 0;
}
@ -690,7 +687,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_get_policy(inode, &policy);
if (err)
return err;
if (copy_to_user((void *)arg, &policy, sizeof(policy)))
if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
#else

View File

@ -882,10 +882,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* wait for I/O completion */
for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i]))
err = -EIO;
goto out;
}
}
first_block = page->index * blocks_per_page;
@ -898,6 +896,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* skip initialized uptodate buddy */
continue;
if (!buffer_verified(bh[group - first_group]))
/* Skip faulty bitmaps */
continue;
err = 0;
/*
* data carry information regarding this
* particular group in the format specified
@ -2008,7 +2011,12 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
}
}
/* This is now called BEFORE we load the buddy bitmap. */
/*
* This is now called BEFORE we load the buddy bitmap.
* Returns either 1 or 0 indicating that the group is either suitable
* for the allocation or not. In addition it can also return negative
* error code when something goes wrong.
*/
static int ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr)
{
@ -2031,7 +2039,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group);
if (ret)
return 0;
return ret;
}
fragments = grp->bb_fragments;
@ -2078,7 +2086,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t ngroups, group, i;
int cr;
int err = 0;
int err = 0, first_err = 0;
struct ext4_sb_info *sbi;
struct super_block *sb;
struct ext4_buddy e4b;
@ -2145,6 +2153,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = ac->ac_g_ex.fe_group;
for (i = 0; i < ngroups; group++, i++) {
int ret = 0;
cond_resched();
/*
* Artificially restricted ngroups for non-extent
@ -2154,8 +2163,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = 0;
/* This now checks without needing the buddy page */
if (!ext4_mb_good_group(ac, group, cr))
ret = ext4_mb_good_group(ac, group, cr);
if (ret <= 0) {
if (!first_err)
first_err = ret;
continue;
}
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err)
@ -2167,9 +2180,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* We need to check again after locking the
* block group
*/
if (!ext4_mb_good_group(ac, group, cr)) {
ret = ext4_mb_good_group(ac, group, cr);
if (ret <= 0) {
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
if (!first_err)
first_err = ret;
continue;
}
@ -2216,6 +2232,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}
}
out:
if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
err = first_err;
return err;
}
@ -2257,12 +2275,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
group--;
if (group == 0)
seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
"[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
"%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
"group", "free", "frags", "first",
"2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
"2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
seq_puts(seq, "#group: free frags first ["
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
" 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);

View File

@ -166,12 +166,9 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
*/
wait_on_page_writeback(page[0]);
wait_on_page_writeback(page[1]);
if (inode1 > inode2) {
struct page *tmp;
tmp = page[0];
page[0] = page[1];
page[1] = tmp;
}
if (inode1 > inode2)
swap(page[0], page[1]);
return 0;
}
@ -574,12 +571,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* TODO: This is non obvious task to swap blocks for inodes with full
jornaling enabled */
/* TODO: it's not obvious how to swap blocks for inodes with full
journaling enabled */
if (ext4_should_journal_data(orig_inode) ||
ext4_should_journal_data(donor_inode)) {
return -EINVAL;
ext4_msg(orig_inode->i_sb, KERN_ERR,
"Online defrag not supported with data journaling");
return -EOPNOTSUPP;
}
/* Protect orig and donor inodes against a truncate */
lock_two_nondirectories(orig_inode, donor_inode);

File diff suppressed because it is too large Load Diff

View File

@ -84,7 +84,7 @@ static void ext4_finish_bio(struct bio *bio)
/* The bounce data pages are unmapped. */
data_page = page;
ctx = (struct ext4_crypto_ctx *)page_private(data_page);
page = ctx->control_page;
page = ctx->w.control_page;
}
#endif

View File

@ -54,8 +54,8 @@ static void completion_pages(struct work_struct *work)
{
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct ext4_crypto_ctx *ctx =
container_of(work, struct ext4_crypto_ctx, work);
struct bio *bio = ctx->bio;
container_of(work, struct ext4_crypto_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
@ -109,9 +109,9 @@ static void mpage_end_io(struct bio *bio, int err)
if (err) {
ext4_release_crypto_ctx(ctx);
} else {
INIT_WORK(&ctx->work, completion_pages);
ctx->bio = bio;
queue_work(ext4_read_workqueue, &ctx->work);
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(ext4_read_workqueue, &ctx->r.work);
return;
}
}

View File

@ -591,14 +591,17 @@ void __ext4_msg(struct super_block *sb,
va_end(args);
}
#define ext4_warning_ratelimit(sb) \
___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
"EXT4-fs warning")
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
"EXT4-fs warning"))
if (!ext4_warning_ratelimit(sb))
return;
va_start(args, fmt);
@ -609,6 +612,24 @@ void __ext4_warning(struct super_block *sb, const char *function,
va_end(args);
}
void __ext4_warning_inode(const struct inode *inode, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (!ext4_warning_ratelimit(inode->i_sb))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
"inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
function, line, inode->i_ino, current->comm, &vaf);
va_end(args);
}
void __ext4_grp_locked_error(const char *function, unsigned int line,
struct super_block *sb, ext4_group_t grp,
unsigned long ino, ext4_fsblk_t block,
@ -807,6 +828,7 @@ static void ext4_put_super(struct super_block *sb)
dump_orphan_list(sb, sbi);
J_ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
/*
@ -879,9 +901,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
ei->i_crypt_info = NULL;
#endif
return &ei->vfs_inode;
}
@ -958,6 +979,10 @@ void ext4_clear_inode(struct inode *inode)
jbd2_free_inode(EXT4_I(inode)->jinode);
EXT4_I(inode)->jinode = NULL;
}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (EXT4_I(inode)->i_crypt_info)
ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
#endif
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@ -3449,11 +3474,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_bdev->bd_part)
sbi->s_sectors_written_start =
part_stat_read(sb->s_bdev->bd_part, sectors[1]);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Modes of operations for file and directory encryption. */
sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
#endif
/* Cleanup superblock name */
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@ -4067,7 +4087,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
(blocksize != PAGE_CACHE_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Unsupported blocksize for fs encryption");
goto failed_mount_wq;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
!(sb->s_flags & MS_RDONLY) &&
!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
@ -4943,6 +4971,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
if (*flags & MS_LAZYTIME)
sb->s_flags |= MS_LAZYTIME;
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
@ -5410,6 +5441,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err, offset = off & (sb->s_blocksize - 1);
int retries = 0;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
@ -5430,7 +5462,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
return -EIO;
}
bh = ext4_bread(handle, inode, blk, 1);
do {
bh = ext4_bread(handle, inode, blk,
EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
} while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
ext4_should_retry_alloc(inode->i_sb, &retries));
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh)
@ -5647,6 +5684,7 @@ static int __init ext4_init_fs(void)
static void __exit ext4_exit_fs(void)
{
ext4_exit_crypto();
ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();

View File

@ -23,31 +23,28 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct ext4_str cstr, pstr;
struct inode *inode = d_inode(dentry);
struct ext4_fname_crypto_ctx *ctx = NULL;
struct ext4_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
if (IS_ERR(ctx))
return ERR_CAST(ctx);
res = ext4_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
if (ext4_inode_is_fast_symlink(inode)) {
caddr = (char *) EXT4_I(inode)->i_data;
max_size = sizeof(EXT4_I(inode)->i_data);
} else {
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage)) {
ext4_put_fname_crypto_ctx(&ctx);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
}
caddr = kmap(cpage);
caddr[size] = 0;
}
@ -71,20 +68,19 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
goto errout;
}
pstr.name = paddr;
res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
pstr.len = plen;
res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
if (res < 0)
goto errout;
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
kunmap(cpage);
page_cache_release(cpage);
}
return *cookie = paddr;
errout:
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
kunmap(cpage);
page_cache_release(cpage);
@ -95,7 +91,7 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
.follow_link = ext4_encrypted_follow_link,
.put_link = kfree_put_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,

View File

@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
unsigned long blocknr;
if (is_journal_aborted(journal))
return 1;
return -EIO;
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
@ -405,10 +405,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* jbd2_cleanup_journal_tail() doesn't get called all that often.
*/
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
__jbd2_update_log_tail(journal, first_tid, blocknr);
return 0;
return __jbd2_update_log_tail(journal, first_tid, blocknr);
}

View File

@ -371,16 +371,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
*/
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
retry_alloc:
new_bh = alloc_buffer_head(GFP_NOFS);
if (!new_bh) {
/*
* Failure is not an option, but __GFP_NOFAIL is going
* away; so we retry ourselves here.
*/
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_alloc;
}
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
/* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1);
@ -885,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
*
* Requires j_checkpoint_mutex
*/
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{
unsigned long freed;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
@ -897,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data.
*/
jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
if (ret)
goto out;
write_lock(&journal->j_state_lock);
freed = block - journal->j_tail;
if (block < journal->j_tail)
@ -913,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
journal->j_tail_sequence = tid;
journal->j_tail = block;
write_unlock(&journal->j_state_lock);
out:
return ret;
}
/*
@ -1331,7 +1329,7 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
static void jbd2_write_superblock(journal_t *journal, int write_op)
static int jbd2_write_superblock(journal_t *journal, int write_op)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
@ -1370,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret,
journal->j_devname);
jbd2_journal_abort(journal, ret);
}
return ret;
}
/**
@ -1383,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
* Update a journal's superblock information about log tail and write it to
* disk, waiting for the IO to complete.
*/
void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
unsigned long tail_block, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@ -1395,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
jbd2_write_superblock(journal, write_op);
ret = jbd2_write_superblock(journal, write_op);
if (ret)
goto out;
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
out:
return ret;
}
/**
@ -1950,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
return -EIO;
mutex_lock(&journal->j_checkpoint_mutex);
jbd2_cleanup_journal_tail(journal);
if (!err) {
err = jbd2_cleanup_journal_tail(journal);
if (err < 0) {
mutex_unlock(&journal->j_checkpoint_mutex);
goto out;
}
err = 0;
}
/* Finally, mark the journal as really needing no recovery.
* This sets s_start==0 in the underlying superblock, which is
@ -1966,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
write_unlock(&journal->j_state_lock);
return 0;
out:
return err;
}
/**
@ -2330,7 +2345,7 @@ static int jbd2_journal_init_journal_head_cache(void)
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head),
0, /* offset */
SLAB_TEMPORARY, /* flags */
SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
NULL); /* ctor */
retval = 0;
if (!jbd2_journal_head_cache) {
@ -2362,10 +2377,8 @@ static struct journal_head *journal_alloc_journal_head(void)
if (!ret) {
jbd_debug(1, "out of memory for journal_head\n");
pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
while (!ret) {
yield();
ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
}
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
return ret;
}

View File

@ -141,11 +141,13 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
{
struct list_head *hash_list;
struct jbd2_revoke_record_s *record;
gfp_t gfp_mask = GFP_NOFS;
repeat:
record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
if (journal_oom_retry)
gfp_mask |= __GFP_NOFAIL;
record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
if (!record)
goto oom;
return -ENOMEM;
record->sequence = seq;
record->blocknr = blocknr;
@ -154,13 +156,6 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
list_add(&record->hash, hash_list);
spin_unlock(&journal->j_revoke_lock);
return 0;
oom:
if (!journal_oom_retry)
return -ENOMEM;
jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
yield();
goto repeat;
}
/* Find a revoke record in the journal's hash table. */

View File

@ -278,22 +278,16 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
alloc_transaction:
if (!journal->j_running_transaction) {
/*
* If __GFP_FS is not present, then we may be being called from
* inside the fs writeback layer, so we MUST NOT fail.
*/
if ((gfp_mask & __GFP_FS) == 0)
gfp_mask |= __GFP_NOFAIL;
new_transaction = kmem_cache_zalloc(transaction_cache,
gfp_mask);
if (!new_transaction) {
/*
* If __GFP_FS is not present, then we may be
* being called from inside the fs writeback
* layer, so we MUST NOT fail. Since
* __GFP_NOFAIL is going away, we will arrange
* to retry the allocation ourselves.
*/
if ((gfp_mask & __GFP_FS) == 0) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto alloc_transaction;
}
if (!new_transaction)
return -ENOMEM;
}
}
jbd_debug(3, "New handle %p going live.\n", handle);
@ -761,6 +755,30 @@ static void warn_dirty_buffer(struct buffer_head *bh)
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}
/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
static void jbd2_freeze_jh_data(struct journal_head *jh)
{
struct page *page;
int offset;
char *source;
struct buffer_head *bh = jh2bh(jh);
J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
page = bh->b_page;
offset = offset_in_page(bh->b_data);
source = kmap_atomic(page);
/* Fire data frozen trigger just before we copy the data */
jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
memcpy(jh->b_frozen_data, source + offset, bh->b_size);
kunmap_atomic(source);
/*
* Now that the frozen data is saved off, we need to store any matching
* triggers.
*/
jh->b_frozen_triggers = jh->b_triggers;
}
/*
* If the buffer is already part of the current transaction, then there
* is nothing we need to do. If it is already part of a prior
@ -780,7 +798,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
journal_t *journal;
int error;
char *frozen_buffer = NULL;
int need_copy = 0;
unsigned long start_lock, time_lock;
if (is_handle_aborted(handle))
@ -866,6 +883,26 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
*/
jh->b_modified = 0;
/*
* If the buffer is not journaled right now, we need to make sure it
* doesn't get written to disk before the caller actually commits the
* new data
*/
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
JBUFFER_TRACE(jh, "file as BJ_Reserved");
/*
* Make sure all stores to jh (b_modified, b_frozen_data) are
* visible before attaching it to the running transaction.
* Paired with barrier in jbd2_write_access_granted()
*/
smp_wmb();
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
spin_unlock(&journal->j_list_lock);
goto done;
}
/*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
@ -873,113 +910,70 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (jh->b_frozen_data) {
JBUFFER_TRACE(jh, "has frozen data");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
jh->b_next_transaction = transaction;
goto done;
}
/* Is there data here we need to preserve? */
if (jh->b_transaction && jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "owned by older transaction");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_transaction ==
journal->j_committing_transaction);
/* There is one case we have to be very careful about.
* If the committing transaction is currently writing
* this buffer out to disk and has NOT made a copy-out,
* then we cannot modify the buffer contents at all
* right now. The essence of copy-out is that it is the
* extra copy, not the primary copy, which gets
* journaled. If the primary copy is already going to
* disk then we cannot do copy-out here. */
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh);
wait_on_bit_io(&bh->b_state, BH_Shadow,
TASK_UNINTERRUPTIBLE);
goto repeat;
}
/*
* Only do the copy if the currently-owning transaction still
* needs it. If buffer isn't on BJ_Metadata list, the
* committing transaction is past that stage (here we use the
* fact that BH_Shadow is set under bh_state lock together with
* refiling to BJ_Shadow list and at this point we know the
* buffer doesn't have BH_Shadow set).
*
* Subtle point, though: if this is a get_undo_access,
* then we will be relying on the frozen_data to contain
* the new value of the committed_data record after the
* transaction, so we HAVE to force the frozen_data copy
* in that case.
*/
if (jh->b_jlist == BJ_Metadata || force_copy) {
JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
frozen_buffer =
jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_ERR
"%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM;
jbd_lock_bh_state(bh);
goto done;
}
goto repeat;
}
jh->b_frozen_data = frozen_buffer;
frozen_buffer = NULL;
need_copy = 1;
}
jh->b_next_transaction = transaction;
goto attach_next;
}
JBUFFER_TRACE(jh, "owned by older transaction");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
/*
* Finally, if the buffer is not journaled right now, we need to make
* sure it doesn't get written to disk before the caller actually
* commits the new data
* There is one case we have to be very careful about. If the
* committing transaction is currently writing this buffer out to disk
* and has NOT made a copy-out, then we cannot modify the buffer
* contents at all right now. The essence of copy-out is that it is
* the extra copy, not the primary copy, which gets journaled. If the
* primary copy is already going to disk then we cannot do copy-out
* here.
*/
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
spin_unlock(&journal->j_list_lock);
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh);
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
goto repeat;
}
/*
* Only do the copy if the currently-owning transaction still needs it.
* If buffer isn't on BJ_Metadata list, the committing transaction is
* past that stage (here we use the fact that BH_Shadow is set under
* bh_state lock together with refiling to BJ_Shadow list and at this
* point we know the buffer doesn't have BH_Shadow set).
*
* Subtle point, though: if this is a get_undo_access, then we will be
* relying on the frozen_data to contain the new value of the
* committed_data record after the transaction, so we HAVE to force the
* frozen_data copy in that case.
*/
if (jh->b_jlist == BJ_Metadata || force_copy) {
JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_ERR "%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM;
goto out;
}
goto repeat;
}
jh->b_frozen_data = frozen_buffer;
frozen_buffer = NULL;
jbd2_freeze_jh_data(jh);
}
attach_next:
/*
* Make sure all stores to jh (b_modified, b_frozen_data) are visible
* before attaching it to the running transaction. Paired with barrier
* in jbd2_write_access_granted()
*/
smp_wmb();
jh->b_next_transaction = transaction;
done:
if (need_copy) {
struct page *page;
int offset;
char *source;
J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
"Possible IO failure.\n");
page = jh2bh(jh)->b_page;
offset = offset_in_page(jh2bh(jh)->b_data);
source = kmap_atomic(page);
/* Fire data frozen trigger just before we copy the data */
jbd2_buffer_frozen_trigger(jh, source + offset,
jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source);
/*
* Now that the frozen data is saved off, we need to store
* any matching triggers.
*/
jh->b_frozen_triggers = jh->b_triggers;
}
jbd_unlock_bh_state(bh);
/*
@ -996,6 +990,55 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
return error;
}
/* Fast check whether buffer is already attached to the required transaction */
static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
{
struct journal_head *jh;
bool ret = false;
/* Dirty buffers require special handling... */
if (buffer_dirty(bh))
return false;
/*
* RCU protects us from dereferencing freed pages. So the checks we do
* are guaranteed not to oops. However the jh slab object can get freed
* & reallocated while we work with it. So we have to be careful. When
* we see jh attached to the running transaction, we know it must stay
* so until the transaction is committed. Thus jh won't be freed and
* will be attached to the same bh while we run. However it can
* happen jh gets freed, reallocated, and attached to the transaction
* just after we get pointer to it from bh. So we have to be careful
* and recheck jh still belongs to our bh before we return success.
*/
rcu_read_lock();
if (!buffer_jbd(bh))
goto out;
/* This should be bh2jh() but that doesn't work with inline functions */
jh = READ_ONCE(bh->b_private);
if (!jh)
goto out;
if (jh->b_transaction != handle->h_transaction &&
jh->b_next_transaction != handle->h_transaction)
goto out;
/*
* There are two reasons for the barrier here:
* 1) Make sure to fetch b_bh after we did previous checks so that we
* detect when jh went through free, realloc, attach to transaction
* while we were checking. Paired with implicit barrier in that path.
* 2) So that access to bh done after jbd2_write_access_granted()
* doesn't get reordered and see inconsistent state of concurrent
* do_get_write_access().
*/
smp_mb();
if (unlikely(jh->b_bh != bh))
goto out;
ret = true;
out:
rcu_read_unlock();
return ret;
}
/**
* int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
* @handle: transaction to add buffer modifications to
@ -1009,9 +1052,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
{
struct journal_head *jh = jbd2_journal_add_journal_head(bh);
struct journal_head *jh;
int rc;
if (jbd2_write_access_granted(handle, bh))
return 0;
jh = jbd2_journal_add_journal_head(bh);
/* We do not want to get caught playing with fields which the
* log thread also manipulates. Make sure that the buffer
* completes any outstanding IO before proceeding. */
@ -1141,11 +1188,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
{
int err;
struct journal_head *jh = jbd2_journal_add_journal_head(bh);
struct journal_head *jh;
char *committed_data = NULL;
JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh))
return 0;
jh = jbd2_journal_add_journal_head(bh);
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@ -1230,8 +1280,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
triggers->t_abort(triggers, jh2bh(jh));
}
/**
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
@ -1264,12 +1312,36 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
jh = jbd2_journal_grab_journal_head(bh);
if (!jh) {
if (!buffer_jbd(bh)) {
ret = -EUCLEAN;
goto out;
}
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
if (jh->b_modified == 1) {
/*
* If it's in our transaction it must be in BJ_Metadata list.
* The assertion is unreliable since we may see jh in
* inconsistent state unless we grab bh_state lock. But this
* is crutial to catch bugs so let's do a reliable check until
* the lockless handling is fully proven.
*/
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh);
}
goto out;
}
journal = transaction->t_journal;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
@ -1360,7 +1432,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
spin_unlock(&journal->j_list_lock);
out_unlock_bh:
jbd_unlock_bh_state(bh);
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
return ret;

View File

@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block);
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
/* Commit management */
@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_sb_errno(journal_t *);
extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
unsigned long, int);
extern void __jbd2_journal_abort_hard (journal_t *);
extern void jbd2_journal_abort (journal_t *, int);

View File

@ -1185,15 +1185,14 @@ TRACE_EVENT(ext4_da_update_reserve_space,
);
TRACE_EVENT(ext4_da_reserve_space,
TP_PROTO(struct inode *inode, int md_needed),
TP_PROTO(struct inode *inode),
TP_ARGS(inode, md_needed),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, i_blocks )
__field( int, md_needed )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( __u16, mode )
@ -1203,18 +1202,17 @@ TRACE_EVENT(ext4_da_reserve_space,
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->i_blocks = inode->i_blocks;
__entry->md_needed = md_needed;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
"reserved_data_blocks %d reserved_meta_blocks %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
__entry->reserved_data_blocks,
__entry->reserved_meta_blocks)
);
@ -2478,6 +2476,31 @@ TRACE_EVENT(ext4_collapse_range,
__entry->offset, __entry->len)
);
TRACE_EVENT(ext4_insert_range,
TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
TP_ARGS(inode, offset, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(loff_t, offset)
__field(loff_t, len)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->offset = offset;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu offset %lld len %lld",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->offset, __entry->len)
);
TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int nr_skipped, int retried),