A very large number of cleanups and bug fixes --- in particular for

the ext4 encryption patches, which is a new feature added in the last merge window. Also fix a number of long-standing xfstest failures. (Quota writes failing due to ENOSPC, a race between truncate and writepage in data=journalled mode that was causing generic/068 to fail, and other corner cases.) Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2 performance eliminating locking when a buffer is modified more than once during a transaction (which is very common for allocation bitmaps, for example), in which case the state of the journalled buffer head doesn't need to change. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQEcBAABCAAGBQJVi3PeAAoJEPL5WVaVDYGj+I0H/jRPexvyvnGfxiqs1sxIlbSk cwewFJSsuKsy/pGYdmHvozWZyWGGORc89NrxoNwdbG+axvHbgUWt/3+vF+rzmaek vX4v9QvCEo4PfpRgzbnYJFhbxGMJtwci887sq1o/UoNXikFYT2kz8rpdf0++eO5W /GJNRA5ZUY0L0eeloUILAMrBr7KjtkI2oXwOZt5q68jh7B3n3XdNQXyEiQS/28aK QYcFrqA/e2Fiuk6l5OSGBCP38mySu+x0nBTLT5LFwwrUBnoZvGtdjM6Sj/yADDDn uP/Zpq56aLzkFRwwItrDaF26BIf2MhIH/WUYs65CraEGxjMaiPuzAudGA/iUVL8= =1BdR -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "A very large number of cleanups and bug fixes --- in particular for the ext4 encryption patches, which is a new feature added in the last merge window. Also fix a number of long-standing xfstest failures. (Quota writes failing due to ENOSPC, a race between truncate and writepage in data=journalled mode that was causing generic/068 to fail, and other corner cases.) Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2 performance eliminating locking when a buffer is modified more than once during a transaction (which is very common for allocation bitmaps, for example), in which case the state of the journalled buffer head doesn't need to change" [ I renamed "ext4_follow_link()" to "ext4_encrypted_follow_link()" in the merge resolution, to make it clear that that function is _only_ used for encrypted symlinks. The function doesn't actually work for non-encrypted symlinks at all, and they use the generic helpers - Linus ] * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (52 commits) ext4: set lazytime on remount if MS_LAZYTIME is set by mount ext4: only call ext4_truncate when size <= isize ext4: make online defrag error reporting consistent ext4: minor cleanup of ext4_da_reserve_space() ext4: don't retry file block mapping on bigalloc fs with non-extent file ext4: prevent ext4_quota_write() from failing due to ENOSPC ext4: call sync_blockdev() before invalidate_bdev() in put_super() jbd2: speedup jbd2_journal_dirty_metadata() jbd2: get rid of open coded allocation retry loop ext4: improve warning directory handling messages jbd2: fix ocfs2 corrupt when updating journal superblock fails ext4: mballoc: avoid 20-argument function call ext4: wait for existing dio workers in ext4_alloc_file_blocks() ext4: recalculate journal credits as inode depth changes jbd2: use GFP_NOFS in jbd2_cleanup_journal_tail() ext4: use swap() in mext_page_double_lock() ext4: use swap() in memswap() ext4: fix race between truncate and __ext4_journalled_writepage() ext4 crypto: fail the mount if blocksize != pagesize ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate ...
2015-06-25 14:06:55 -07:00 · 2015-06-25 14:06:55 -07:00 · d857da7b70
commit d857da7b70
parent 77d431641e a2fd66d069
29 changed files with 1498 additions and 1365 deletions
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@ -72,6 +72,7 @@ config EXT4_ENCRYPTION
 	select CRYPTO_ECB
 	select CRYPTO_XTS
 	select CRYPTO_CTS
+	select CRYPTO_CTR
 	select CRYPTO_SHA256
 	select KEYS
 	select ENCRYPTED_KEYS
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@ -369,7 +369,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
 	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
 	struct ext4_sb_info *sbi = EXT4_SB(sb);

-	if (buffer_verified(bh))
+	if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
 		return;

 	ext4_lock_group(sb, block_group);
@ -446,7 +446,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 		unlock_buffer(bh);
 		if (err)
 			ext4_error(sb, "Checksum bad for grp %u", block_group);
-		return bh;
+		goto verify;
 	}
 	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@ -55,6 +55,9 @@ static mempool_t *ext4_bounce_page_pool;
 static LIST_HEAD(ext4_free_crypto_ctxs);
 static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);

+static struct kmem_cache *ext4_crypto_ctx_cachep;
+struct kmem_cache *ext4_crypt_info_cachep;
+
 /**
 * ext4_release_crypto_ctx() - Releases an encryption context
 * @ctx: The encryption context to release.
@ -68,18 +71,12 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
 {
 	unsigned long flags;

-	if (ctx->bounce_page) {
-		if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
-			__free_page(ctx->bounce_page);
-		else
-			mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
-		ctx->bounce_page = NULL;
-	}
-	ctx->control_page = NULL;
+	if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page)
+		mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool);
+	ctx->w.bounce_page = NULL;
+	ctx->w.control_page = NULL;
 	if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
-		if (ctx->tfm)
-			crypto_free_tfm(ctx->tfm);
-		kfree(ctx);
+		kmem_cache_free(ext4_crypto_ctx_cachep, ctx);
 	} else {
 		spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
 		list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@ -87,23 +84,6 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
 	}
 }

-/**
- * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
- * @mask: The allocation mask.
- *
- * Return: An allocated and initialized encryption context on success. An error
- * value or NULL otherwise.
- */
-static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
-{
-	struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
-					      mask);
-
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
-	return ctx;
-}
-
 /**
 * ext4_get_crypto_ctx() - Gets an encryption context
 * @inode:       The inode for which we are doing the crypto
@ -118,10 +98,10 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
 	struct ext4_crypto_ctx *ctx = NULL;
 	int res = 0;
 	unsigned long flags;
-	struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;

-	if (!ext4_read_workqueue)
-		ext4_init_crypto();
+	if (ci == NULL)
+		return ERR_PTR(-ENOKEY);

 	/*
 	 * We first try getting the ctx from a free list because in
@ -140,50 +120,16 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
 		list_del(&ctx->free_list);
 	spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
 	if (!ctx) {
-		ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
-		if (IS_ERR(ctx)) {
-			res = PTR_ERR(ctx);
+		ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+		if (!ctx) {
+			res = -ENOMEM;
 			goto out;
 		}
 		ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
 	} else {
 		ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
 	}
-
-	/* Allocate a new Crypto API context if we don't already have
-	 * one or if it isn't the right mode. */
-	BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
-	if (ctx->tfm && (ctx->mode != key->mode)) {
-		crypto_free_tfm(ctx->tfm);
-		ctx->tfm = NULL;
-		ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
-	}
-	if (!ctx->tfm) {
-		switch (key->mode) {
-		case EXT4_ENCRYPTION_MODE_AES_256_XTS:
-			ctx->tfm = crypto_ablkcipher_tfm(
-				crypto_alloc_ablkcipher("xts(aes)", 0, 0));
-			break;
-		case EXT4_ENCRYPTION_MODE_AES_256_GCM:
-			/* TODO(mhalcrow): AEAD w/ gcm(aes);
-			 * crypto_aead_setauthsize() */
-			ctx->tfm = ERR_PTR(-ENOTSUPP);
-			break;
-		default:
-			BUG();
-		}
-		if (IS_ERR_OR_NULL(ctx->tfm)) {
-			res = PTR_ERR(ctx->tfm);
-			ctx->tfm = NULL;
-			goto out;
-		}
-		ctx->mode = key->mode;
-	}
-	BUG_ON(key->size != ext4_encryption_key_size(key->mode));
-
-	/* There shouldn't be a bounce page attached to the crypto
-	 * context at this point. */
-	BUG_ON(ctx->bounce_page);
+	ctx->flags &= ~EXT4_WRITE_PATH_FL;

 out:
 	if (res) {
@ -204,20 +150,8 @@ void ext4_exit_crypto(void)
 {
 	struct ext4_crypto_ctx *pos, *n;

-	list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
-		if (pos->bounce_page) {
-			if (pos->flags &
-			    EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
-				__free_page(pos->bounce_page);
-			} else {
-				mempool_free(pos->bounce_page,
-					     ext4_bounce_page_pool);
-			}
-		}
-		if (pos->tfm)
-			crypto_free_tfm(pos->tfm);
-		kfree(pos);
-	}
+	list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list)
+		kmem_cache_free(ext4_crypto_ctx_cachep, pos);
 	INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
 	if (ext4_bounce_page_pool)
 		mempool_destroy(ext4_bounce_page_pool);
@ -225,6 +159,12 @@ void ext4_exit_crypto(void)
 	if (ext4_read_workqueue)
 		destroy_workqueue(ext4_read_workqueue);
 	ext4_read_workqueue = NULL;
+	if (ext4_crypto_ctx_cachep)
+		kmem_cache_destroy(ext4_crypto_ctx_cachep);
+	ext4_crypto_ctx_cachep = NULL;
+	if (ext4_crypt_info_cachep)
+		kmem_cache_destroy(ext4_crypt_info_cachep);
+	ext4_crypt_info_cachep = NULL;
 }

 /**
@ -237,23 +177,31 @@ void ext4_exit_crypto(void)
 */
 int ext4_init_crypto(void)
 {
-	int i, res;
+	int i, res = -ENOMEM;

 	mutex_lock(&crypto_init);
 	if (ext4_read_workqueue)
 		goto already_initialized;
 	ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
-	if (!ext4_read_workqueue) {
-		res = -ENOMEM;
+	if (!ext4_read_workqueue)
+		goto fail;
+
+	ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx,
+					    SLAB_RECLAIM_ACCOUNT);
+	if (!ext4_crypto_ctx_cachep)
+		goto fail;
+
+	ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info,
+					    SLAB_RECLAIM_ACCOUNT);
+	if (!ext4_crypt_info_cachep)
 		goto fail;
-	}

 	for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
 		struct ext4_crypto_ctx *ctx;

-		ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
-		if (IS_ERR(ctx)) {
-			res = PTR_ERR(ctx);
+		ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+		if (!ctx) {
+			res = -ENOMEM;
 			goto fail;
 		}
 		list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
@ -317,32 +265,11 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
 	struct ablkcipher_request *req = NULL;
 	DECLARE_EXT4_COMPLETION_RESULT(ecr);
 	struct scatterlist dst, src;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 	int res = 0;

-	BUG_ON(!ctx->tfm);
-	BUG_ON(ctx->mode != ei->i_encryption_key.mode);
-
-	if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
-		printk_ratelimited(KERN_ERR
-				   "%s: unsupported crypto algorithm: %d\n",
-				   __func__, ctx->mode);
-		return -ENOTSUPP;
-	}
-
-	crypto_ablkcipher_clear_flags(atfm, ~0);
-	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-
-	res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
-				       ei->i_encryption_key.size);
-	if (res) {
-		printk_ratelimited(KERN_ERR
-				   "%s: crypto_ablkcipher_setkey() failed\n",
-				   __func__);
-		return res;
-	}
-	req = ablkcipher_request_alloc(atfm, GFP_NOFS);
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
 	if (!req) {
 		printk_ratelimited(KERN_ERR
 				   "%s: crypto_request_alloc() failed\n",
@ -384,6 +311,15 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
 	return 0;
 }

+static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
+{
+	ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT);
+	if (ctx->w.bounce_page == NULL)
+		return ERR_PTR(-ENOMEM);
+	ctx->flags |= EXT4_WRITE_PATH_FL;
+	return ctx->w.bounce_page;
+}
+
 /**
 * ext4_encrypt() - Encrypts a page
 * @inode:          The inode for which the encryption should take place
@ -413,27 +349,17 @@ struct page *ext4_encrypt(struct inode *inode,
 		return (struct page *) ctx;

 	/* The encryption operation will require a bounce page. */
-	ciphertext_page = alloc_page(GFP_NOFS);
-	if (!ciphertext_page) {
-		/* This is a potential bottleneck, but at least we'll have
-		 * forward progress. */
-		ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-						 GFP_NOFS);
-		if (WARN_ON_ONCE(!ciphertext_page)) {
-			ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-							 GFP_NOFS | __GFP_WAIT);
-		}
-		ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
-		ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-	}
-	ctx->bounce_page = ciphertext_page;
-	ctx->control_page = plaintext_page;
+	ciphertext_page = alloc_bounce_page(ctx);
+	if (IS_ERR(ciphertext_page))
+		goto errout;
+	ctx->w.control_page = plaintext_page;
 	err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
 			       plaintext_page, ciphertext_page);
 	if (err) {
+		ciphertext_page = ERR_PTR(err);
+	errout:
 		ext4_release_crypto_ctx(ctx);
-		return ERR_PTR(err);
+		return ciphertext_page;
 	}
 	SetPagePrivate(ciphertext_page);
 	set_page_private(ciphertext_page, (unsigned long)ctx);
@ -470,8 +396,8 @@ int ext4_decrypt_one(struct inode *inode, struct page *page)

 	struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);

-	if (!ctx)
-		return -ENOMEM;
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
 	ret = ext4_decrypt(ctx, page);
 	ext4_release_crypto_ctx(ctx);
 	return ret;
@ -493,21 +419,11 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);

-	ciphertext_page = alloc_page(GFP_NOFS);
-	if (!ciphertext_page) {
-		/* This is a potential bottleneck, but at least we'll have
-		 * forward progress. */
-		ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-						 GFP_NOFS);
-		if (WARN_ON_ONCE(!ciphertext_page)) {
-			ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
-							 GFP_NOFS | __GFP_WAIT);
-		}
-		ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
-		ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+	ciphertext_page = alloc_bounce_page(ctx);
+	if (IS_ERR(ciphertext_page)) {
+		err = PTR_ERR(ciphertext_page);
+		goto errout;
 	}
-	ctx->bounce_page = ciphertext_page;

 	while (len--) {
 		err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
@ -529,6 +445,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
 			goto errout;
 		}
 		err = submit_bio_wait(WRITE, bio);
+		bio_put(bio);
 		if (err)
 			goto errout;
 	}
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@ -48,6 +48,12 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
 	return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
 }

+static unsigned max_name_len(struct inode *inode)
+{
+	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
+		EXT4_NAME_LEN;
+}
+
 /**
 * ext4_fname_encrypt() -
 *
@ -55,43 +61,52 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode)
 * ciphertext. Errors are returned as negative numbers.  We trust the caller to
 * allocate sufficient memory to oname string.
 */
-static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
+static int ext4_fname_encrypt(struct inode *inode,
 			      const struct qstr *iname,
 			      struct ext4_str *oname)
 {
 	u32 ciphertext_len;
 	struct ablkcipher_request *req = NULL;
 	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct crypto_ablkcipher *tfm = ctx->ctfm;
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 	int res = 0;
 	char iv[EXT4_CRYPTO_BLOCK_SIZE];
-	struct scatterlist sg[1];
-	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
-	char *workbuf;
+	struct scatterlist src_sg, dst_sg;
+	int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
+	char *workbuf, buf[32], *alloc_buf = NULL;
+	unsigned lim = max_name_len(inode);

-	if (iname->len <= 0 || iname->len > ctx->lim)
+	if (iname->len <= 0 || iname->len > lim)
 		return -EIO;

 	ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
 		EXT4_CRYPTO_BLOCK_SIZE : iname->len;
 	ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
-	ciphertext_len = (ciphertext_len > ctx->lim)
-			? ctx->lim : ciphertext_len;
+	ciphertext_len = (ciphertext_len > lim)
+			? lim : ciphertext_len;
+
+	if (ciphertext_len <= sizeof(buf)) {
+		workbuf = buf;
+	} else {
+		alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
+		if (!alloc_buf)
+			return -ENOMEM;
+		workbuf = alloc_buf;
+	}

 	/* Allocate request */
 	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
 	if (!req) {
 		printk_ratelimited(
 		    KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
+		kfree(alloc_buf);
 		return -ENOMEM;
 	}
 	ablkcipher_request_set_callback(req,
 		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 		ext4_dir_crypt_complete, &ecr);

-	/* Map the workpage */
-	workbuf = kmap(ctx->workpage);
-
 	/* Copy the input */
 	memcpy(workbuf, iname->name, iname->len);
 	if (iname->len < ciphertext_len)
@ -101,21 +116,16 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
 	memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);

 	/* Create encryption request */
-	sg_init_table(sg, 1);
-	sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
-	ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
+	sg_init_one(&src_sg, workbuf, ciphertext_len);
+	sg_init_one(&dst_sg, oname->name, ciphertext_len);
+	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
 	res = crypto_ablkcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
 		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
-	if (res >= 0) {
-		/* Copy the result to output */
-		memcpy(oname->name, workbuf, ciphertext_len);
-		res = ciphertext_len;
-	}
-	kunmap(ctx->workpage);
+	kfree(alloc_buf);
 	ablkcipher_request_free(req);
 	if (res < 0) {
 		printk_ratelimited(
@ -132,20 +142,21 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
 *	Errors are returned as negative numbers.
 *	We trust the caller to allocate sufficient memory to oname string.
 */
-static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
+static int ext4_fname_decrypt(struct inode *inode,
 			      const struct ext4_str *iname,
 			      struct ext4_str *oname)
 {
 	struct ext4_str tmp_in[2], tmp_out[1];
 	struct ablkcipher_request *req = NULL;
 	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct scatterlist sg[1];
-	struct crypto_ablkcipher *tfm = ctx->ctfm;
+	struct scatterlist src_sg, dst_sg;
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 	int res = 0;
 	char iv[EXT4_CRYPTO_BLOCK_SIZE];
-	char *workbuf;
+	unsigned lim = max_name_len(inode);

-	if (iname->len <= 0 || iname->len > ctx->lim)
+	if (iname->len <= 0 || iname->len > lim)
 		return -EIO;

 	tmp_in[0].name = iname->name;
@ -163,31 +174,19 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
 		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 		ext4_dir_crypt_complete, &ecr);

-	/* Map the workpage */
-	workbuf = kmap(ctx->workpage);
-
-	/* Copy the input */
-	memcpy(workbuf, iname->name, iname->len);
-
 	/* Initialize IV */
 	memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);

 	/* Create encryption request */
-	sg_init_table(sg, 1);
-	sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
-	ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+	sg_init_one(&src_sg, iname->name, iname->len);
+	sg_init_one(&dst_sg, oname->name, oname->len);
+	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
 	res = crypto_ablkcipher_decrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
 		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
-	if (res >= 0) {
-		/* Copy the result to output */
-		memcpy(oname->name, workbuf, iname->len);
-		res = iname->len;
-	}
-	kunmap(ctx->workpage);
 	ablkcipher_request_free(req);
 	if (res < 0) {
 		printk_ratelimited(
@ -253,207 +252,6 @@ static int digest_decode(const char *src, int len, char *dst)
 	return cp - dst;
 }

-/**
- * ext4_free_fname_crypto_ctx() -
- *
- * Frees up a crypto context.
- */
-void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
-{
-	if (ctx == NULL || IS_ERR(ctx))
-		return;
-
-	if (ctx->ctfm && !IS_ERR(ctx->ctfm))
-		crypto_free_ablkcipher(ctx->ctfm);
-	if (ctx->htfm && !IS_ERR(ctx->htfm))
-		crypto_free_hash(ctx->htfm);
-	if (ctx->workpage && !IS_ERR(ctx->workpage))
-		__free_page(ctx->workpage);
-	kfree(ctx);
-}
-
-/**
- * ext4_put_fname_crypto_ctx() -
- *
- * Return: The crypto context onto free list. If the free list is above a
- * threshold, completely frees up the context, and returns the memory.
- *
- * TODO: Currently we directly free the crypto context. Eventually we should
- * add code it to return to free list. Such an approach will increase
- * efficiency of directory lookup.
- */
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
-{
-	if (*ctx == NULL || IS_ERR(*ctx))
-		return;
-	ext4_free_fname_crypto_ctx(*ctx);
-	*ctx = NULL;
-}
-
-/**
- * ext4_search_fname_crypto_ctx() -
- */
-static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
-		const struct ext4_encryption_key *key)
-{
-	return NULL;
-}
-
-/**
- * ext4_alloc_fname_crypto_ctx() -
- */
-struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
-	const struct ext4_encryption_key *key)
-{
-	struct ext4_fname_crypto_ctx *ctx;
-
-	ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
-	if (ctx == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
-		/* This will automatically set key mode to invalid
-		 * As enum for ENCRYPTION_MODE_INVALID is zero */
-		memset(&ctx->key, 0, sizeof(ctx->key));
-	} else {
-		memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
-	}
-	ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
-		? 0 : 1;
-	ctx->ctfm_key_is_ready = 0;
-	ctx->ctfm = NULL;
-	ctx->htfm = NULL;
-	ctx->workpage = NULL;
-	return ctx;
-}
-
-/**
- * ext4_get_fname_crypto_ctx() -
- *
- * Allocates a free crypto context and initializes it to hold
- * the crypto material for the inode.
- *
- * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
- */
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
-	struct inode *inode, u32 max_ciphertext_len)
-{
-	struct ext4_fname_crypto_ctx *ctx;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	int res;
-
-	/* Check if the crypto policy is set on the inode */
-	res = ext4_encrypted_inode(inode);
-	if (res == 0)
-		return NULL;
-
-	if (!ext4_has_encryption_key(inode))
-		ext4_generate_encryption_key(inode);
-
-	/* Get a crypto context based on the key.
-	 * A new context is allocated if no context matches the requested key.
-	 */
-	ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
-	if (ctx == NULL)
-		ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
-	if (IS_ERR(ctx))
-		return ctx;
-
-	ctx->flags = ei->i_crypt_policy_flags;
-	if (ctx->has_valid_key) {
-		if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
-			printk_once(KERN_WARNING
-				    "ext4: unsupported key mode %d\n",
-				    ctx->key.mode);
-			return ERR_PTR(-ENOKEY);
-		}
-
-		/* As a first cut, we will allocate new tfm in every call.
-		 * later, we will keep the tfm around, in case the key gets
-		 * re-used */
-		if (ctx->ctfm == NULL) {
-			ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
-					0, 0);
-		}
-		if (IS_ERR(ctx->ctfm)) {
-			res = PTR_ERR(ctx->ctfm);
-			printk(
-			    KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
-			    __func__, res);
-			ctx->ctfm = NULL;
-			ext4_put_fname_crypto_ctx(&ctx);
-			return ERR_PTR(res);
-		}
-		if (ctx->ctfm == NULL) {
-			printk(
-			    KERN_DEBUG "%s: could not allocate crypto tfm\n",
-			    __func__);
-			ext4_put_fname_crypto_ctx(&ctx);
-			return ERR_PTR(-ENOMEM);
-		}
-		if (ctx->workpage == NULL)
-			ctx->workpage = alloc_page(GFP_NOFS);
-		if (IS_ERR(ctx->workpage)) {
-			res = PTR_ERR(ctx->workpage);
-			printk(
-			    KERN_DEBUG "%s: error (%d) allocating work page\n",
-			    __func__, res);
-			ctx->workpage = NULL;
-			ext4_put_fname_crypto_ctx(&ctx);
-			return ERR_PTR(res);
-		}
-		if (ctx->workpage == NULL) {
-			printk(
-			    KERN_DEBUG "%s: could not allocate work page\n",
-			    __func__);
-			ext4_put_fname_crypto_ctx(&ctx);
-			return ERR_PTR(-ENOMEM);
-		}
-		ctx->lim = max_ciphertext_len;
-		crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
-		crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
-			CRYPTO_TFM_REQ_WEAK_KEY);
-
-		/* If we are lucky, we will get a context that is already
-		 * set up with the right key. Else, we will have to
-		 * set the key */
-		if (!ctx->ctfm_key_is_ready) {
-			/* Since our crypto objectives for filename encryption
-			 * are pretty weak,
-			 * we directly use the inode master key */
-			res = crypto_ablkcipher_setkey(ctx->ctfm,
-					ctx->key.raw, ctx->key.size);
-			if (res) {
-				ext4_put_fname_crypto_ctx(&ctx);
-				return ERR_PTR(-EIO);
-			}
-			ctx->ctfm_key_is_ready = 1;
-		} else {
-			/* In the current implementation, key should never be
-			 * marked "ready" for a context that has just been
-			 * allocated. So we should never reach here */
-			 BUG();
-		}
-	}
-	if (ctx->htfm == NULL)
-		ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(ctx->htfm)) {
-		res = PTR_ERR(ctx->htfm);
-		printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
-			__func__, res);
-		ctx->htfm = NULL;
-		ext4_put_fname_crypto_ctx(&ctx);
-		return ERR_PTR(res);
-	}
-	if (ctx->htfm == NULL) {
-		printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
-				__func__);
-		ext4_put_fname_crypto_ctx(&ctx);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return ctx;
-}
-
 /**
 * ext4_fname_crypto_round_up() -
 *
@ -464,44 +262,29 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
 	return ((size+blksize-1)/blksize)*blksize;
 }

-/**
- * ext4_fname_crypto_namelen_on_disk() -
- */
-int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
-				      u32 namelen)
+unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen)
 {
-	u32 ciphertext_len;
-	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+	int padding = 32;

-	if (ctx == NULL)
-		return -EIO;
-	if (!(ctx->has_valid_key))
-		return -EACCES;
-	ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
-		EXT4_CRYPTO_BLOCK_SIZE : namelen;
-	ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
-	ciphertext_len = (ciphertext_len > ctx->lim)
-			? ctx->lim : ciphertext_len;
-	return (int) ciphertext_len;
+	if (ci)
+		padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
+	if (ilen < EXT4_CRYPTO_BLOCK_SIZE)
+		ilen = EXT4_CRYPTO_BLOCK_SIZE;
+	return ext4_fname_crypto_round_up(ilen, padding);
 }

-/**
- * ext4_fname_crypto_alloc_obuff() -
+/*
+ * ext4_fname_crypto_alloc_buffer() -
 *
 * Allocates an output buffer that is sufficient for the crypto operation
 * specified by the context and the direction.
 */
-int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_crypto_alloc_buffer(struct inode *inode,
 				   u32 ilen, struct ext4_str *crypto_str)
 {
-	unsigned int olen;
-	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
+	unsigned int olen = ext4_fname_encrypted_size(inode, ilen);

-	if (!ctx)
-		return -EIO;
-	if (padding < EXT4_CRYPTO_BLOCK_SIZE)
-		padding = EXT4_CRYPTO_BLOCK_SIZE;
-	olen = ext4_fname_crypto_round_up(ilen, padding);
 	crypto_str->len = olen;
 	if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
 		olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@ -529,7 +312,7 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
 /**
 * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
 */
-int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int _ext4_fname_disk_to_usr(struct inode *inode,
 			    struct dx_hash_info *hinfo,
 			    const struct ext4_str *iname,
 			    struct ext4_str *oname)
@ -537,8 +320,6 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
 	char buf[24];
 	int ret;

-	if (ctx == NULL)
-		return -EIO;
 	if (iname->len < 3) {
 		/*Check for . and .. */
 		if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
@ -548,8 +329,8 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
 			return oname->len;
 		}
 	}
-	if (ctx->has_valid_key)
-		return ext4_fname_decrypt(ctx, iname, oname);
+	if (EXT4_I(inode)->i_crypt_info)
+		return ext4_fname_decrypt(inode, iname, oname);

 	if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
 		ret = digest_encode(iname->name, iname->len, oname->name);
@ -568,7 +349,7 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
 	return ret + 1;
 }

-int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_disk_to_usr(struct inode *inode,
 			   struct dx_hash_info *hinfo,
 			   const struct ext4_dir_entry_2 *de,
 			   struct ext4_str *oname)
@ -576,21 +357,20 @@ int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
 	struct ext4_str iname = {.name = (unsigned char *) de->name,
 				 .len = de->name_len };

-	return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
+	return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname);
 }


 /**
 * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
 */
-int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_usr_to_disk(struct inode *inode,
 			   const struct qstr *iname,
 			   struct ext4_str *oname)
 {
 	int res;
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;

-	if (ctx == NULL)
-		return -EIO;
 	if (iname->len < 3) {
 		/*Check for . and .. */
 		if (iname->name[0] == '.' &&
@ -601,8 +381,8 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
 			return oname->len;
 		}
 	}
-	if (ctx->has_valid_key) {
-		res = ext4_fname_encrypt(ctx, iname, oname);
+	if (ci) {
+		res = ext4_fname_encrypt(inode, iname, oname);
 		return res;
 	}
 	/* Without a proper key, a user is not allowed to modify the filenames
@ -611,109 +391,79 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
 	return -EACCES;
 }

-/*
- * Calculate the htree hash from a filename from user space
- */
-int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
-			    const struct qstr *iname,
-			    struct dx_hash_info *hinfo)
+int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
+			      int lookup, struct ext4_filename *fname)
 {
-	struct ext4_str tmp;
-	int ret = 0;
-	char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
+	struct ext4_crypt_info *ci;
+	int ret = 0, bigname = 0;

-	if (!ctx ||
+	memset(fname, 0, sizeof(struct ext4_filename));
+	fname->usr_fname = iname;
+
+	if (!ext4_encrypted_inode(dir) ||
 	    ((iname->name[0] == '.') &&
 	     ((iname->len == 1) ||
 	      ((iname->name[1] == '.') && (iname->len == 2))))) {
-		ext4fs_dirhash(iname->name, iname->len, hinfo);
+		fname->disk_name.name = (unsigned char *) iname->name;
+		fname->disk_name.len = iname->len;
 		return 0;
 	}
-
-	if (!ctx->has_valid_key && iname->name[0] == '_') {
-		if (iname->len != 33)
-			return -ENOENT;
-		ret = digest_decode(iname->name+1, iname->len, buf);
-		if (ret != 24)
-			return -ENOENT;
-		memcpy(&hinfo->hash, buf, 4);
-		memcpy(&hinfo->minor_hash, buf + 4, 4);
-		return 0;
-	}
-
-	if (!ctx->has_valid_key && iname->name[0] != '_') {
-		if (iname->len > 43)
-			return -ENOENT;
-		ret = digest_decode(iname->name, iname->len, buf);
-		ext4fs_dirhash(buf, ret, hinfo);
-		return 0;
-	}
-
-	/* First encrypt the plaintext name */
-	ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
-	if (ret < 0)
+	ret = ext4_get_encryption_info(dir);
+	if (ret)
 		return ret;
-
-	ret = ext4_fname_encrypt(ctx, iname, &tmp);
-	if (ret >= 0) {
-		ext4fs_dirhash(tmp.name, tmp.len, hinfo);
-		ret = 0;
-	}
-
-	ext4_fname_crypto_free_buffer(&tmp);
-	return ret;
-}
-
-int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
-		     int len, const char * const name,
-		     struct ext4_dir_entry_2 *de)
-{
-	int ret = -ENOENT;
-	int bigname = (*name == '_');
-
-	if (ctx->has_valid_key) {
-		if (cstr->name == NULL) {
-			struct qstr istr;
-
-			ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
-			if (ret < 0)
-				goto errout;
-			istr.name = name;
-			istr.len = len;
-			ret = ext4_fname_encrypt(ctx, &istr, cstr);
-			if (ret < 0)
-				goto errout;
-		}
-	} else {
-		if (cstr->name == NULL) {
-			cstr->name = kmalloc(32, GFP_KERNEL);
-			if (cstr->name == NULL)
-				return -ENOMEM;
-			if ((bigname && (len != 33)) ||
-			    (!bigname && (len > 43)))
-				goto errout;
-			ret = digest_decode(name+bigname, len-bigname,
-					    cstr->name);
-			if (ret < 0) {
-				ret = -ENOENT;
-				goto errout;
-			}
-			cstr->len = ret;
-		}
-		if (bigname) {
-			if (de->name_len < 16)
-				return 0;
-			ret = memcmp(de->name + de->name_len - 16,
-				     cstr->name + 8, 16);
-			return (ret == 0) ? 1 : 0;
-		}
-	}
-	if (de->name_len != cstr->len)
+	ci = EXT4_I(dir)->i_crypt_info;
+	if (ci) {
+		ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
+						     &fname->crypto_buf);
+		if (ret < 0)
+			return ret;
+		ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf);
+		if (ret < 0)
+			goto errout;
+		fname->disk_name.name = fname->crypto_buf.name;
+		fname->disk_name.len = fname->crypto_buf.len;
 		return 0;
-	ret = memcmp(de->name, cstr->name, cstr->len);
-	return (ret == 0) ? 1 : 0;
+	}
+	if (!lookup)
+		return -EACCES;
+
+	/* We don't have the key and we are doing a lookup; decode the
+	 * user-supplied name
+	 */
+	if (iname->name[0] == '_')
+		bigname = 1;
+	if ((bigname && (iname->len != 33)) ||
+	    (!bigname && (iname->len > 43)))
+		return -ENOENT;
+
+	fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
+	if (fname->crypto_buf.name == NULL)
+		return -ENOMEM;
+	ret = digest_decode(iname->name + bigname, iname->len - bigname,
+			    fname->crypto_buf.name);
+	if (ret < 0) {
+		ret = -ENOENT;
+		goto errout;
+	}
+	fname->crypto_buf.len = ret;
+	if (bigname) {
+		memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4);
+		memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4);
+	} else {
+		fname->disk_name.name = fname->crypto_buf.name;
+		fname->disk_name.len = fname->crypto_buf.len;
+	}
+	return 0;
 errout:
-	kfree(cstr->name);
-	cstr->name = NULL;
+	kfree(fname->crypto_buf.name);
+	fname->crypto_buf.name = NULL;
 	return ret;
 }
+
+void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+	kfree(fname->crypto_buf.name);
+	fname->crypto_buf.name = NULL;
+	fname->usr_fname = NULL;
+	fname->disk_name.name = NULL;
+}
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@ -84,14 +84,38 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
 	return res;
 }

-/**
- * ext4_generate_encryption_key() - generates an encryption key
- * @inode: The inode to generate the encryption key for.
- */
-int ext4_generate_encryption_key(struct inode *inode)
+void ext4_free_crypt_info(struct ext4_crypt_info *ci)
+{
+	if (!ci)
+		return;
+
+	if (ci->ci_keyring_key)
+		key_put(ci->ci_keyring_key);
+	crypto_free_ablkcipher(ci->ci_ctfm);
+	kmem_cache_free(ext4_crypt_info_cachep, ci);
+}
+
+void ext4_free_encryption_info(struct inode *inode,
+			       struct ext4_crypt_info *ci)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
+	struct ext4_crypt_info *prev;
+
+	if (ci == NULL)
+		ci = ACCESS_ONCE(ei->i_crypt_info);
+	if (ci == NULL)
+		return;
+	prev = cmpxchg(&ei->i_crypt_info, ci, NULL);
+	if (prev != ci)
+		return;
+
+	ext4_free_crypt_info(ci);
+}
+
+int _ext4_get_encryption_info(struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_crypt_info *crypt_info;
 	char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
 				 (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
 	struct key *keyring_key = NULL;
@ -99,32 +123,77 @@ int ext4_generate_encryption_key(struct inode *inode)
 	struct ext4_encryption_context ctx;
 	struct user_key_payload *ukp;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+	struct crypto_ablkcipher *ctfm;
+	const char *cipher_str;
+	char raw_key[EXT4_MAX_KEY_SIZE];
+	char mode;
+	int res;
+
+	if (!ext4_read_workqueue) {
+		res = ext4_init_crypto();
+		if (res)
+			return res;
+	}
+
+retry:
+	crypt_info = ACCESS_ONCE(ei->i_crypt_info);
+	if (crypt_info) {
+		if (!crypt_info->ci_keyring_key ||
+		    key_validate(crypt_info->ci_keyring_key) == 0)
+			return 0;
+		ext4_free_encryption_info(inode, crypt_info);
+		goto retry;
+	}
+
+	res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
 				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
 				 &ctx, sizeof(ctx));
-
-	if (res != sizeof(ctx)) {
-		if (res > 0)
-			res = -EINVAL;
-		goto out;
-	}
+	if (res < 0) {
+		if (!DUMMY_ENCRYPTION_ENABLED(sbi))
+			return res;
+		ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode =
+			EXT4_ENCRYPTION_MODE_AES_256_CTS;
+		ctx.flags = 0;
+	} else if (res != sizeof(ctx))
+		return -EINVAL;
 	res = 0;

-	ei->i_crypt_policy_flags = ctx.flags;
+	crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL);
+	if (!crypt_info)
+		return -ENOMEM;
+
+	crypt_info->ci_flags = ctx.flags;
+	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+	crypt_info->ci_ctfm = NULL;
+	crypt_info->ci_keyring_key = NULL;
+	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
+	       sizeof(crypt_info->ci_master_key));
 	if (S_ISREG(inode->i_mode))
-		crypt_key->mode = ctx.contents_encryption_mode;
+		mode = crypt_info->ci_data_mode;
 	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		crypt_key->mode = ctx.filenames_encryption_mode;
-	else {
-		printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
+		mode = crypt_info->ci_filename_mode;
+	else
 		BUG();
-	}
-	crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
-	BUG_ON(!crypt_key->size);
-	if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
-		memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+	switch (mode) {
+	case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+		cipher_str = "xts(aes)";
+		break;
+	case EXT4_ENCRYPTION_MODE_AES_256_CTS:
+		cipher_str = "cts(cbc(aes))";
+		break;
+	default:
+		printk_once(KERN_WARNING
+			    "ext4: unsupported key mode %d (ino %u)\n",
+			    mode, (unsigned) inode->i_ino);
+		res = -ENOKEY;
 		goto out;
 	}
+	if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
+		memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+		goto got_key;
+	}
 	memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
 	       EXT4_KEY_DESC_PREFIX_SIZE);
 	sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
@ -138,6 +207,7 @@ int ext4_generate_encryption_key(struct inode *inode)
 		keyring_key = NULL;
 		goto out;
 	}
+	crypt_info->ci_keyring_key = keyring_key;
 	BUG_ON(keyring_key->type != &key_type_logon);
 	ukp = ((struct user_key_payload *)keyring_key->payload.data);
 	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
@ -148,19 +218,43 @@ int ext4_generate_encryption_key(struct inode *inode)
 	BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
 		     EXT4_KEY_DERIVATION_NONCE_SIZE);
 	BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
-	res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
+	res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
+				  raw_key);
+got_key:
+	ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+	if (!ctfm || IS_ERR(ctfm)) {
+		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+		printk(KERN_DEBUG
+		       "%s: error %d (inode %u) allocating crypto tfm\n",
+		       __func__, res, (unsigned) inode->i_ino);
+		goto out;
+	}
+	crypt_info->ci_ctfm = ctfm;
+	crypto_ablkcipher_clear_flags(ctfm, ~0);
+	crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+			     CRYPTO_TFM_REQ_WEAK_KEY);
+	res = crypto_ablkcipher_setkey(ctfm, raw_key,
+				       ext4_encryption_key_size(mode));
+	if (res)
+		goto out;
+	memzero_explicit(raw_key, sizeof(raw_key));
+	if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
+		ext4_free_crypt_info(crypt_info);
+		goto retry;
+	}
+	return 0;
+
 out:
-	if (keyring_key)
-		key_put(keyring_key);
-	if (res < 0)
-		crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
+	if (res == -ENOKEY)
+		res = 0;
+	ext4_free_crypt_info(crypt_info);
+	memzero_explicit(raw_key, sizeof(raw_key));
 	return res;
 }

 int ext4_has_encryption_key(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;

-	return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
+	return (ei->i_crypt_info != NULL);
 }
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@ -51,6 +51,10 @@ static int ext4_create_encryption_context_from_policy(
 	struct ext4_encryption_context ctx;
 	int res = 0;

+	res = ext4_convert_inline_data(inode);
+	if (res)
+		return res;
+
 	ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
 	       EXT4_KEY_DESCRIPTOR_SIZE);
@ -89,6 +93,8 @@ int ext4_process_policy(const struct ext4_encryption_policy *policy,
 		return -EINVAL;

 	if (!ext4_inode_has_encryption_context(inode)) {
+		if (!S_ISDIR(inode->i_mode))
+			return -EINVAL;
 		if (!ext4_empty_dir(inode))
 			return -ENOTEMPTY;
 		return ext4_create_encryption_context_from_policy(inode,
@ -126,7 +132,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
 int ext4_is_child_context_consistent_with_parent(struct inode *parent,
 						 struct inode *child)
 {
-	struct ext4_encryption_context parent_ctx, child_ctx;
+	struct ext4_crypt_info *parent_ci, *child_ci;
 	int res;

 	if ((parent == NULL) || (child == NULL)) {
@ -136,26 +142,28 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
 	/* no restrictions if the parent directory is not encrypted */
 	if (!ext4_encrypted_inode(parent))
 		return 1;
-	res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
-			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-			     &parent_ctx, sizeof(parent_ctx));
-	if (res != sizeof(parent_ctx))
-		return 0;
 	/* if the child directory is not encrypted, this is always a problem */
 	if (!ext4_encrypted_inode(child))
 		return 0;
-	res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
-			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-			     &child_ctx, sizeof(child_ctx));
-	if (res != sizeof(child_ctx))
+	res = ext4_get_encryption_info(parent);
+	if (res)
 		return 0;
-	return (memcmp(parent_ctx.master_key_descriptor,
-		       child_ctx.master_key_descriptor,
+	res = ext4_get_encryption_info(child);
+	if (res)
+		return 0;
+	parent_ci = EXT4_I(parent)->i_crypt_info;
+	child_ci = EXT4_I(child)->i_crypt_info;
+	if (!parent_ci && !child_ci)
+		return 1;
+	if (!parent_ci || !child_ci)
+		return 0;
+
+	return (memcmp(parent_ci->ci_master_key,
+		       child_ci->ci_master_key,
 		       EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(parent_ctx.contents_encryption_mode ==
-		 child_ctx.contents_encryption_mode) &&
-		(parent_ctx.filenames_encryption_mode ==
-		 child_ctx.filenames_encryption_mode));
+		(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+		(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
+		(parent_ci->ci_flags == child_ci->ci_flags));
 }

 /**
@ -168,31 +176,40 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
 int ext4_inherit_context(struct inode *parent, struct inode *child)
 {
 	struct ext4_encryption_context ctx;
-	int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
-				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-				 &ctx, sizeof(ctx));
+	struct ext4_crypt_info *ci;
+	int res;

-	if (res != sizeof(ctx)) {
-		if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
-			ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
-			ctx.contents_encryption_mode =
-				EXT4_ENCRYPTION_MODE_AES_256_XTS;
-			ctx.filenames_encryption_mode =
-				EXT4_ENCRYPTION_MODE_AES_256_CTS;
-			ctx.flags = 0;
-			memset(ctx.master_key_descriptor, 0x42,
-			       EXT4_KEY_DESCRIPTOR_SIZE);
-			res = 0;
-		} else {
-			goto out;
-		}
+	res = ext4_get_encryption_info(parent);
+	if (res < 0)
+		return res;
+	ci = EXT4_I(parent)->i_crypt_info;
+	if (ci == NULL)
+		return -ENOKEY;
+
+	ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
+	if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
+		ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode =
+			EXT4_ENCRYPTION_MODE_AES_256_CTS;
+		ctx.flags = 0;
+		memset(ctx.master_key_descriptor, 0x42,
+		       EXT4_KEY_DESCRIPTOR_SIZE);
+		res = 0;
+	} else {
+		ctx.contents_encryption_mode = ci->ci_data_mode;
+		ctx.filenames_encryption_mode = ci->ci_filename_mode;
+		ctx.flags = ci->ci_flags;
+		memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+		       EXT4_KEY_DESCRIPTOR_SIZE);
 	}
 	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
 	res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
 			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
 			     sizeof(ctx), 0);
-out:
-	if (!res)
+	if (!res) {
 		ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
+		ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
+		res = ext4_get_encryption_info(child);
+	}
 	return res;
 }
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@ -110,7 +110,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bh = NULL;
 	int dir_has_error = 0;
-	struct ext4_fname_crypto_ctx *enc_ctx = NULL;
 	struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};

 	if (is_dx_dir(inode)) {
@ -134,16 +133,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			return err;
 	}

-	enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
-	if (IS_ERR(enc_ctx))
-		return PTR_ERR(enc_ctx);
-	if (enc_ctx) {
-		err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
+	if (ext4_encrypted_inode(inode)) {
+		err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN,
 						     &fname_crypto_str);
-		if (err < 0) {
-			ext4_put_fname_crypto_ctx(&enc_ctx);
+		if (err < 0)
 			return err;
-		}
 	}

 	offset = ctx->pos & (sb->s_blocksize - 1);
@ -239,17 +233,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			offset += ext4_rec_len_from_disk(de->rec_len,
 					sb->s_blocksize);
 			if (le32_to_cpu(de->inode)) {
-				if (enc_ctx == NULL) {
-					/* Directory is not encrypted */
+				if (!ext4_encrypted_inode(inode)) {
 					if (!dir_emit(ctx, de->name,
 					    de->name_len,
 					    le32_to_cpu(de->inode),
 					    get_dtype(sb, de->file_type)))
 						goto done;
 				} else {
+					int save_len = fname_crypto_str.len;
+
 					/* Directory is encrypted */
-					err = ext4_fname_disk_to_usr(enc_ctx,
+					err = ext4_fname_disk_to_usr(inode,
 						NULL, de, &fname_crypto_str);
+					fname_crypto_str.len = save_len;
 					if (err < 0)
 						goto errout;
 					if (!dir_emit(ctx,
@ -272,7 +268,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 	err = 0;
 errout:
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ext4_put_fname_crypto_ctx(&enc_ctx);
 	ext4_fname_crypto_free_buffer(&fname_crypto_str);
 #endif
 	brelse(bh);
@ -598,6 +593,13 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
 	return 0;
 }

+static int ext4_dir_open(struct inode * inode, struct file * filp)
+{
+	if (ext4_encrypted_inode(inode))
+		return ext4_get_encryption_info(inode) ? -EACCES : 0;
+	return 0;
+}
+
 static int ext4_release_dir(struct inode *inode, struct file *filp)
 {
 	if (filp->private_data)
@ -640,5 +642,6 @@ const struct file_operations ext4_dir_operations = {
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
 	.fsync		= ext4_sync_file,
+	.open		= ext4_dir_open,
 	.release	= ext4_release_dir,
 };
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@ -69,15 +69,6 @@
 #define ext_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif

-#define EXT4_ERROR_INODE(inode, fmt, a...) \
-	ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
-
-#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)			\
-	ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
-
-#define EXT4_ERROR_FILE(file, block, fmt, a...)				\
-	ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
-
 /* data type for block offset of block group */
 typedef int ext4_grpblk_t;

@ -90,6 +81,11 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;

+enum SHIFT_DIRECTION {
+	SHIFT_LEFT = 0,
+	SHIFT_RIGHT,
+};
+
 /*
 * Flags used in mballoc's allocation_context flags field.
 *
@ -911,7 +907,6 @@ struct ext4_inode_info {

 	/* on-disk additional length */
 	__u16 i_extra_isize;
-	char i_crypt_policy_flags;

 	/* Indicate the inline data space. */
 	u16 i_inline_off;
@ -955,7 +950,7 @@ struct ext4_inode_info {

 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 	/* Encryption params */
-	struct ext4_encryption_key i_encryption_key;
+	struct ext4_crypt_info *i_crypt_info;
 #endif
 };

@ -1374,12 +1369,6 @@ struct ext4_sb_info {
 	struct ratelimit_state s_err_ratelimit_state;
 	struct ratelimit_state s_warning_ratelimit_state;
 	struct ratelimit_state s_msg_ratelimit_state;
-
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	/* Encryption */
-	uint32_t s_file_encryption_mode;
-	uint32_t s_dir_encryption_mode;
-#endif
 };

 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@ -1838,6 +1827,17 @@ struct dx_hash_info
 */
 #define HASH_NB_ALWAYS		1

+struct ext4_filename {
+	const struct qstr *usr_fname;
+	struct ext4_str disk_name;
+	struct dx_hash_info hinfo;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+	struct ext4_str crypto_buf;
+#endif
+};
+
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p)  ((p)->disk_name.len)

 /*
 * Describe an inode's exact location on disk and in memory
@ -2054,6 +2054,7 @@ int ext4_get_policy(struct inode *inode,
 		    struct ext4_encryption_policy *policy);

 /* crypto.c */
+extern struct kmem_cache *ext4_crypt_info_cachep;
 bool ext4_valid_contents_enc_mode(uint32_t mode);
 uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
 extern struct workqueue_struct *ext4_read_workqueue;
@ -2085,57 +2086,84 @@ static inline int ext4_sb_has_crypto(struct super_block *sb)
 /* crypto_fname.c */
 bool ext4_valid_filenames_enc_mode(uint32_t mode);
 u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
-int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
+int ext4_fname_crypto_alloc_buffer(struct inode *inode,
 				   u32 ilen, struct ext4_str *crypto_str);
-int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int _ext4_fname_disk_to_usr(struct inode *inode,
 			    struct dx_hash_info *hinfo,
 			    const struct ext4_str *iname,
 			    struct ext4_str *oname);
-int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_disk_to_usr(struct inode *inode,
 			   struct dx_hash_info *hinfo,
 			   const struct ext4_dir_entry_2 *de,
 			   struct ext4_str *oname);
-int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+int ext4_fname_usr_to_disk(struct inode *inode,
 			   const struct qstr *iname,
 			   struct ext4_str *oname);
-int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
-			   const struct qstr *iname,
-			   struct dx_hash_info *hinfo);
-int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
-				      u32 namelen);
-int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
-		     int len, const char * const name,
-		     struct ext4_dir_entry_2 *de);
-
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
-							u32 max_len);
 void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
+int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
+			      int lookup, struct ext4_filename *fname);
+void ext4_fname_free_filename(struct ext4_filename *fname);
 #else
 static inline
-void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
-static inline
-struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
-							u32 max_len)
+int ext4_setup_fname_crypto(struct inode *inode)
 {
-	return NULL;
+	return 0;
 }
 static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
+static inline int ext4_fname_setup_filename(struct inode *dir,
+				     const struct qstr *iname,
+				     int lookup, struct ext4_filename *fname)
+{
+	fname->usr_fname = iname;
+	fname->disk_name.name = (unsigned char *) iname->name;
+	fname->disk_name.len = iname->len;
+	return 0;
+}
+static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
 #endif


 /* crypto_key.c */
-int ext4_generate_encryption_key(struct inode *inode);
+void ext4_free_crypt_info(struct ext4_crypt_info *ci);
+void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
+int _ext4_get_encryption_info(struct inode *inode);

 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_has_encryption_key(struct inode *inode);
+
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
+
+	if (!ci ||
+	    (ci->ci_keyring_key &&
+	     (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+					   (1 << KEY_FLAG_REVOKED) |
+					   (1 << KEY_FLAG_DEAD)))))
+		return _ext4_get_encryption_info(inode);
+	return 0;
+}
+
+static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
+{
+	return EXT4_I(inode)->i_crypt_info;
+}
+
 #else
 static inline int ext4_has_encryption_key(struct inode *inode)
 {
 	return 0;
 }
+static inline int ext4_get_encryption_info(struct inode *inode)
+{
+	return 0;
+}
+static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
+{
+	return NULL;
+}
 #endif


@ -2156,14 +2184,13 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
 			     struct buffer_head *bh,
 			     void *buf, int buf_size,
-			     const char *name, int namelen,
+			     struct ext4_filename *fname,
 			     struct ext4_dir_entry_2 **dest_de);
 int ext4_insert_dentry(struct inode *dir,
-			struct inode *inode,
-			struct ext4_dir_entry_2 *de,
-			int buf_size,
-		       const struct qstr *iname,
-			const char *name, int namelen);
+		       struct inode *inode,
+		       struct ext4_dir_entry_2 *de,
+		       int buf_size,
+		       struct ext4_filename *fname);
 static inline void ext4_update_dx_flag(struct inode *inode)
 {
 	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
@ -2317,13 +2344,14 @@ extern int ext4_orphan_add(handle_t *, struct inode *);
 extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 				__u32 start_minor_hash, __u32 *next_hash);
-extern int search_dir(struct buffer_head *bh,
-		      char *search_buf,
-		      int buf_size,
-		      struct inode *dir,
-		      const struct qstr *d_name,
-		      unsigned int offset,
-		      struct ext4_dir_entry_2 **res_dir);
+extern int ext4_search_dir(struct buffer_head *bh,
+			   char *search_buf,
+			   int buf_size,
+			   struct inode *dir,
+			   struct ext4_filename *fname,
+			   const struct qstr *d_name,
+			   unsigned int offset,
+			   struct ext4_dir_entry_2 **res_dir);
 extern int ext4_generic_delete_entry(handle_t *handle,
 				     struct inode *dir,
 				     struct ext4_dir_entry_2 *de_del,
@ -2368,6 +2396,9 @@ void __ext4_abort(struct super_block *, const char *, unsigned int,
 extern __printf(4, 5)
 void __ext4_warning(struct super_block *, const char *, unsigned int,
 		    const char *, ...);
+extern __printf(4, 5)
+void __ext4_warning_inode(const struct inode *inode, const char *function,
+			  unsigned int line, const char *fmt, ...);
 extern __printf(3, 4)
 void __ext4_msg(struct super_block *, const char *, const char *, ...);
 extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
@ -2378,6 +2409,15 @@ void __ext4_grp_locked_error(const char *, unsigned int,
 			     unsigned long, ext4_fsblk_t,
 			     const char *, ...);

+#define EXT4_ERROR_INODE(inode, fmt, a...) \
+	ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
+
+#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)			\
+	ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
+
+#define EXT4_ERROR_FILE(file, block, fmt, a...)				\
+	ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
+
 #ifdef CONFIG_PRINTK

 #define ext4_error_inode(inode, func, line, block, fmt, ...)		\
@ -2390,6 +2430,8 @@ void __ext4_grp_locked_error(const char *, unsigned int,
 	__ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
 #define ext4_warning(sb, fmt, ...)					\
 	__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
+#define ext4_warning_inode(inode, fmt, ...)				\
+	__ext4_warning_inode(inode, __func__, __LINE__, fmt, ##__VA_ARGS__)
 #define ext4_msg(sb, level, fmt, ...)				\
 	__ext4_msg(sb, level, fmt, ##__VA_ARGS__)
 #define dump_mmp_msg(sb, mmp, msg)					\
@ -2425,6 +2467,11 @@ do {									\
 	no_printk(fmt, ##__VA_ARGS__);					\
 	__ext4_warning(sb, "", 0, " ");					\
 } while (0)
+#define ext4_warning_inode(inode, fmt, ...)				\
+do {									\
+	no_printk(fmt, ##__VA_ARGS__);					\
+	__ext4_warning_inode(inode, "", 0, " ");			\
+} while (0)
 #define ext4_msg(sb, level, fmt, ...)					\
 do {									\
 	no_printk(fmt, ##__VA_ARGS__);					\
@ -2768,7 +2815,9 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
 extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 					 unsigned len, unsigned copied,
 					 struct page *page);
-extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
+extern int ext4_try_add_inline_entry(handle_t *handle,
+				     struct ext4_filename *fname,
+				     struct dentry *dentry,
 				     struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
 				      struct inode *parent,
@ -2782,6 +2831,7 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
 				   __u32 start_hash, __u32 start_minor_hash,
 				   int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+					struct ext4_filename *fname,
 					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data);
@ -2913,6 +2963,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
 extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
 				struct inode *inode2, ext4_lblk_t lblk1,
 			     ext4_lblk_t lblk2,  ext4_lblk_t count,
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@ -66,24 +66,39 @@ struct ext4_encryption_context {
 #define EXT4_KEY_DESC_PREFIX "ext4:"
 #define EXT4_KEY_DESC_PREFIX_SIZE 5

+/* This is passed in from userspace into the kernel keyring */
 struct ext4_encryption_key {
-	uint32_t mode;
-	char raw[EXT4_MAX_KEY_SIZE];
-	uint32_t size;
+        __u32 mode;
+        char raw[EXT4_MAX_KEY_SIZE];
+        __u32 size;
+} __attribute__((__packed__));
+
+struct ext4_crypt_info {
+	char		ci_data_mode;
+	char		ci_filename_mode;
+	char		ci_flags;
+	struct crypto_ablkcipher *ci_ctfm;
+	struct key	*ci_keyring_key;
+	char		ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
 };

 #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL             0x00000001
-#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL     0x00000002
+#define EXT4_WRITE_PATH_FL			      0x00000002

 struct ext4_crypto_ctx {
-	struct crypto_tfm *tfm;         /* Crypto API context */
-	struct page *bounce_page;       /* Ciphertext page on write path */
-	struct page *control_page;      /* Original page on write path */
-	struct bio *bio;                /* The bio for this context */
-	struct work_struct work;        /* Work queue for read complete path */
-	struct list_head free_list;     /* Free list */
-	int flags;                      /* Flags */
-	int mode;                       /* Encryption mode for tfm */
+	union {
+		struct {
+			struct page *bounce_page;       /* Ciphertext page */
+			struct page *control_page;      /* Original page  */
+		} w;
+		struct {
+			struct bio *bio;
+			struct work_struct work;
+		} r;
+		struct list_head free_list;     /* Free list */
+	};
+	char flags;                      /* Flags */
+	char mode;                       /* Encryption mode for tfm */
 };

 struct ext4_completion_result {
@ -121,18 +136,6 @@ struct ext4_str {
 	u32 len;
 };

-struct ext4_fname_crypto_ctx {
-	u32 lim;
-	char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
-	struct crypto_ablkcipher *ctfm;
-	struct crypto_hash *htfm;
-	struct page *workpage;
-	struct ext4_encryption_key key;
-	unsigned flags : 8;
-	unsigned has_valid_key : 1;
-	unsigned ctfm_key_is_ready : 1;
-};
-
 /**
 * For encrypted symlinks, the ciphertext length is stored at the beginning
 * of the string in little-endian format.
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@ -4456,6 +4456,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+	if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+		ar.flags |= EXT4_MB_USE_RESERVED;
 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
 	if (!newblock)
 		goto out2;
@ -4663,6 +4665,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	int ret = 0;
 	int ret2 = 0;
 	int retries = 0;
+	int depth = 0;
 	struct ext4_map_blocks map;
 	unsigned int credits;
 	loff_t epos;
@ -4677,13 +4680,32 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	if (len <= EXT_UNWRITTEN_MAX_LEN)
 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;

+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
 	/*
 	 * credits to insert 1 extent into extent tree
 	 */
 	credits = ext4_chunk_trans_blocks(inode, len);
+	/*
+	 * We can only call ext_depth() on extent based inodes
+	 */
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		depth = ext_depth(inode);
+	else
+		depth = -1;

 retry:
 	while (ret >= 0 && len) {
+		/*
+		 * Recalculate credits when extent tree depth changes.
+		 */
+		if (depth >= 0 && depth != ext_depth(inode)) {
+			credits = ext4_chunk_trans_blocks(inode, len);
+			depth = ext_depth(inode);
+		}
+
 		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
 					    credits);
 		if (IS_ERR(handle)) {
@ -4725,6 +4747,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 		goto retry;
 	}

+	ext4_inode_resume_unlocked_dio(inode);
+
 	return ret > 0 ? ret2 : ret;
 }

@ -4912,12 +4936,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	 * bug we should fix....
 	 */
 	if (ext4_encrypted_inode(inode) &&
-	    (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
+	    (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
+		     FALLOC_FL_ZERO_RANGE)))
 		return -EOPNOTSUPP;

 	/* Return error if mode is not supported */
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
+		     FALLOC_FL_INSERT_RANGE))
 		return -EOPNOTSUPP;

 	if (mode & FALLOC_FL_PUNCH_HOLE)
@ -4930,6 +4956,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (mode & FALLOC_FL_COLLAPSE_RANGE)
 		return ext4_collapse_range(inode, offset, len);

+	if (mode & FALLOC_FL_INSERT_RANGE)
+		return ext4_insert_range(inode, offset, len);
+
 	if (mode & FALLOC_FL_ZERO_RANGE)
 		return ext4_zero_range(file, offset, len, mode);

@ -5224,13 +5253,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
 /*
 * ext4_ext_shift_path_extents:
 * Shift the extents of a path structure lying between path[depth].p_ext
- * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
- * from starting block for each extent.
+ * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
+ * if it is right shift or left shift operation.
 */
 static int
 ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 			    struct inode *inode, handle_t *handle,
-			    ext4_lblk_t *start)
+			    enum SHIFT_DIRECTION SHIFT)
 {
 	int depth, err = 0;
 	struct ext4_extent *ex_start, *ex_last;
@ -5252,19 +5281,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 			if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
 				update = 1;

-			*start = le32_to_cpu(ex_last->ee_block) +
-				ext4_ext_get_actual_len(ex_last);
-
 			while (ex_start <= ex_last) {
-				le32_add_cpu(&ex_start->ee_block, -shift);
-				/* Try to merge to the left. */
-				if ((ex_start >
-				     EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
-				    ext4_ext_try_to_merge_right(inode,
-							path, ex_start - 1))
+				if (SHIFT == SHIFT_LEFT) {
+					le32_add_cpu(&ex_start->ee_block,
+						-shift);
+					/* Try to merge to the left. */
+					if ((ex_start >
+					    EXT_FIRST_EXTENT(path[depth].p_hdr))
+					    &&
+					    ext4_ext_try_to_merge_right(inode,
+					    path, ex_start - 1))
+						ex_last--;
+					else
+						ex_start++;
+				} else {
+					le32_add_cpu(&ex_last->ee_block, shift);
+					ext4_ext_try_to_merge_right(inode, path,
+						ex_last);
 					ex_last--;
-				else
-					ex_start++;
+				}
 			}
 			err = ext4_ext_dirty(handle, inode, path + depth);
 			if (err)
@ -5279,7 +5314,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 		if (err)
 			goto out;

-		le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+		if (SHIFT == SHIFT_LEFT)
+			le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+		else
+			le32_add_cpu(&path[depth].p_idx->ei_block, shift);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		if (err)
 			goto out;
@ -5297,19 +5335,20 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,

 /*
 * ext4_ext_shift_extents:
- * All the extents which lies in the range from start to the last allocated
- * block for the file are shifted downwards by shift blocks.
+ * All the extents which lies in the range from @start to the last allocated
+ * block for the @inode are shifted either towards left or right (depending
+ * upon @SHIFT) by @shift blocks.
 * On success, 0 is returned, error otherwise.
 */
 static int
 ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
-		       ext4_lblk_t start, ext4_lblk_t shift)
+		       ext4_lblk_t start, ext4_lblk_t shift,
+		       enum SHIFT_DIRECTION SHIFT)
 {
 	struct ext4_ext_path *path;
 	int ret = 0, depth;
 	struct ext4_extent *extent;
-	ext4_lblk_t stop_block;
-	ext4_lblk_t ex_start, ex_end;
+	ext4_lblk_t stop, *iterator, ex_start, ex_end;

 	/* Let path point to the last extent */
 	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
@ -5321,58 +5360,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 	if (!extent)
 		goto out;

-	stop_block = le32_to_cpu(extent->ee_block) +
+	stop = le32_to_cpu(extent->ee_block) +
 			ext4_ext_get_actual_len(extent);

-	/* Nothing to shift, if hole is at the end of file */
-	if (start >= stop_block)
-		goto out;
+       /*
+	 * In case of left shift, Don't start shifting extents until we make
+	 * sure the hole is big enough to accommodate the shift.
+	*/
+	if (SHIFT == SHIFT_LEFT) {
+		path = ext4_find_extent(inode, start - 1, &path, 0);
+		if (IS_ERR(path))
+			return PTR_ERR(path);
+		depth = path->p_depth;
+		extent =  path[depth].p_ext;
+		if (extent) {
+			ex_start = le32_to_cpu(extent->ee_block);
+			ex_end = le32_to_cpu(extent->ee_block) +
+				ext4_ext_get_actual_len(extent);
+		} else {
+			ex_start = 0;
+			ex_end = 0;
+		}

-	/*
-	 * Don't start shifting extents until we make sure the hole is big
-	 * enough to accomodate the shift.
-	 */
-	path = ext4_find_extent(inode, start - 1, &path, 0);
-	if (IS_ERR(path))
-		return PTR_ERR(path);
-	depth = path->p_depth;
-	extent =  path[depth].p_ext;
-	if (extent) {
-		ex_start = le32_to_cpu(extent->ee_block);
-		ex_end = le32_to_cpu(extent->ee_block) +
-			ext4_ext_get_actual_len(extent);
-	} else {
-		ex_start = 0;
-		ex_end = 0;
+		if ((start == ex_start && shift > ex_start) ||
+		    (shift > start - ex_end)) {
+			ext4_ext_drop_refs(path);
+			kfree(path);
+			return -EINVAL;
+		}
 	}

-	if ((start == ex_start && shift > ex_start) ||
-	    (shift > start - ex_end))
-		return -EINVAL;
+	/*
+	 * In case of left shift, iterator points to start and it is increased
+	 * till we reach stop. In case of right shift, iterator points to stop
+	 * and it is decreased till we reach start.
+	 */
+	if (SHIFT == SHIFT_LEFT)
+		iterator = &start;
+	else
+		iterator = &stop;

 	/* Its safe to start updating extents */
-	while (start < stop_block) {
-		path = ext4_find_extent(inode, start, &path, 0);
+	while (start < stop) {
+		path = ext4_find_extent(inode, *iterator, &path, 0);
 		if (IS_ERR(path))
 			return PTR_ERR(path);
 		depth = path->p_depth;
 		extent = path[depth].p_ext;
 		if (!extent) {
 			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-					 (unsigned long) start);
+					 (unsigned long) *iterator);
 			return -EIO;
 		}
-		if (start > le32_to_cpu(extent->ee_block)) {
+		if (SHIFT == SHIFT_LEFT && *iterator >
+		    le32_to_cpu(extent->ee_block)) {
 			/* Hole, move to the next extent */
 			if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
 				path[depth].p_ext++;
 			} else {
-				start = ext4_ext_next_allocated_block(path);
+				*iterator = ext4_ext_next_allocated_block(path);
 				continue;
 			}
 		}
+
+		if (SHIFT == SHIFT_LEFT) {
+			extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+			*iterator = le32_to_cpu(extent->ee_block) +
+					ext4_ext_get_actual_len(extent);
+		} else {
+			extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
+			*iterator =  le32_to_cpu(extent->ee_block) > 0 ?
+				le32_to_cpu(extent->ee_block) - 1 : 0;
+			/* Update path extent in case we need to stop */
+			while (le32_to_cpu(extent->ee_block) < start)
+				extent++;
+			path[depth].p_ext = extent;
+		}
 		ret = ext4_ext_shift_path_extents(path, shift, inode,
-				handle, &start);
+				handle, SHIFT);
 		if (ret)
 			break;
 	}
@ -5485,7 +5550,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	ext4_discard_preallocations(inode);

 	ret = ext4_ext_shift_extents(inode, handle, punch_stop,
-				     punch_stop - punch_start);
+				     punch_stop - punch_start, SHIFT_LEFT);
 	if (ret) {
 		up_write(&EXT4_I(inode)->i_data_sem);
 		goto out_stop;
@ -5510,6 +5575,174 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	return ret;
 }

+/*
+ * ext4_insert_range:
+ * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
+ * The data blocks starting from @offset to the EOF are shifted by @len
+ * towards right to create a hole in the @inode. Inode size is increased
+ * by len bytes.
+ * Returns 0 on success, error otherwise.
+ */
+int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+{
+	struct super_block *sb = inode->i_sb;
+	handle_t *handle;
+	struct ext4_ext_path *path;
+	struct ext4_extent *extent;
+	ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
+	unsigned int credits, ee_len;
+	int ret = 0, depth, split_flag = 0;
+	loff_t ioffset;
+
+	/*
+	 * We need to test this early because xfstests assumes that an
+	 * insert range of (0, 1) will return EOPNOTSUPP if the file
+	 * system does not support insert range.
+	 */
+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		return -EOPNOTSUPP;
+
+	/* Insert range works only on fs block size aligned offsets. */
+	if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+			len & (EXT4_CLUSTER_SIZE(sb) - 1))
+		return -EINVAL;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	trace_ext4_insert_range(inode, offset, len);
+
+	offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+	len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
+
+	/* Call ext4_force_commit to flush all data in case of data=journal */
+	if (ext4_should_journal_data(inode)) {
+		ret = ext4_force_commit(inode->i_sb);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Need to round down to align start offset to page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+
+	/* Write out all dirty pages */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+			LLONG_MAX);
+	if (ret)
+		return ret;
+
+	/* Take mutex lock */
+	mutex_lock(&inode->i_mutex);
+
+	/* Currently just for extent based files */
+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+		ret = -EOPNOTSUPP;
+		goto out_mutex;
+	}
+
+	/* Check for wrap through zero */
+	if (inode->i_size + len > inode->i_sb->s_maxbytes) {
+		ret = -EFBIG;
+		goto out_mutex;
+	}
+
+	/* Offset should be less than i_size */
+	if (offset >= i_size_read(inode)) {
+		ret = -EINVAL;
+		goto out_mutex;
+	}
+
+	truncate_pagecache(inode, ioffset);
+
+	/* Wait for existing dio to complete */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+	credits = ext4_writepage_trans_blocks(inode);
+	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_dio;
+	}
+
+	/* Expand file to avoid data loss if there is error while shifting */
+	inode->i_size += len;
+	EXT4_I(inode)->i_disksize += len;
+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+	ret = ext4_mark_inode_dirty(handle, inode);
+	if (ret)
+		goto out_stop;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ext4_discard_preallocations(inode);
+
+	path = ext4_find_extent(inode, offset_lblk, NULL, 0);
+	if (IS_ERR(path)) {
+		up_write(&EXT4_I(inode)->i_data_sem);
+		goto out_stop;
+	}
+
+	depth = ext_depth(inode);
+	extent = path[depth].p_ext;
+	if (extent) {
+		ee_start_lblk = le32_to_cpu(extent->ee_block);
+		ee_len = ext4_ext_get_actual_len(extent);
+
+		/*
+		 * If offset_lblk is not the starting block of extent, split
+		 * the extent @offset_lblk
+		 */
+		if ((offset_lblk > ee_start_lblk) &&
+				(offset_lblk < (ee_start_lblk + ee_len))) {
+			if (ext4_ext_is_unwritten(extent))
+				split_flag = EXT4_EXT_MARK_UNWRIT1 |
+					EXT4_EXT_MARK_UNWRIT2;
+			ret = ext4_split_extent_at(handle, inode, &path,
+					offset_lblk, split_flag,
+					EXT4_EX_NOCACHE |
+					EXT4_GET_BLOCKS_PRE_IO |
+					EXT4_GET_BLOCKS_METADATA_NOFAIL);
+		}
+
+		ext4_ext_drop_refs(path);
+		kfree(path);
+		if (ret < 0) {
+			up_write(&EXT4_I(inode)->i_data_sem);
+			goto out_stop;
+		}
+	}
+
+	ret = ext4_es_remove_extent(inode, offset_lblk,
+			EXT_MAX_BLOCKS - offset_lblk);
+	if (ret) {
+		up_write(&EXT4_I(inode)->i_data_sem);
+		goto out_stop;
+	}
+
+	/*
+	 * if offset_lblk lies in a hole which is at start of file, use
+	 * ee_start_lblk to shift extents
+	 */
+	ret = ext4_ext_shift_extents(inode, handle,
+		ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
+		len_lblk, SHIFT_RIGHT);
+
+	up_write(&EXT4_I(inode)->i_data_sem);
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+
+out_stop:
+	ext4_journal_stop(handle);
+out_dio:
+	ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 /**
 * ext4_swap_extents - Swap extents between two inodes
 *
@ -5542,7 +5775,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
 	BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-	BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+	BUG_ON(!mutex_is_locked(&inode2->i_mutex));

 	*erp = ext4_es_remove_extent(inode1, lblk1, count);
 	if (unlikely(*erp))
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@ -223,9 +223,11 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file->f_mapping->host;

 	if (ext4_encrypted_inode(inode)) {
-		int err = ext4_generate_encryption_key(inode);
+		int err = ext4_get_encryption_info(inode);
 		if (err)
 			return 0;
+		if (ext4_encryption_info(inode) == NULL)
+			return -ENOKEY;
 	}
 	file_accessed(file);
 	if (IS_DAX(file_inode(file))) {
@ -278,6 +280,13 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			ext4_journal_stop(handle);
 		}
 	}
+	if (ext4_encrypted_inode(inode)) {
+		ret = ext4_get_encryption_info(inode);
+		if (ret)
+			return -EACCES;
+		if (ext4_encryption_info(inode) == NULL)
+			return -ENOKEY;
+	}
 	/*
 	 * Set up the jbd2_inode if we are opening the inode for
 	 * writing and the journal is present
@ -287,13 +296,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 		if (ret < 0)
 			return ret;
 	}
-	ret = dquot_file_open(inode, filp);
-	if (!ret && ext4_encrypted_inode(inode)) {
-		ret = ext4_generate_encryption_key(inode);
-		if (ret)
-			ret = -EACCES;
-	}
-	return ret;
+	return dquot_file_open(inode, filp);
 }

 /*
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@ -726,11 +726,25 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 	ext4_group_t i;
 	ext4_group_t flex_group;
 	struct ext4_group_info *grp;
+	int encrypt = 0;

 	/* Cannot create files in a deleted directory */
 	if (!dir || !dir->i_nlink)
 		return ERR_PTR(-EPERM);

+	if ((ext4_encrypted_inode(dir) ||
+	     DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
+	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+		err = ext4_get_encryption_info(dir);
+		if (err)
+			return ERR_PTR(err);
+		if (ext4_encryption_info(dir) == NULL)
+			return ERR_PTR(-EPERM);
+		if (!handle)
+			nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
+		encrypt = 1;
+	}
+
 	sb = dir->i_sb;
 	ngroups = ext4_get_groups_count(sb);
 	trace_ext4_request_inode(dir, mode);
@ -996,12 +1010,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 	ei->i_block_group = group;
 	ei->i_last_alloc_group = ~0;

-	/* If the directory encrypted, then we should encrypt the inode. */
-	if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
-	    (ext4_encrypted_inode(dir) ||
-	     DUMMY_ENCRYPTION_ENABLED(sbi)))
-		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
-
 	ext4_set_inode_flags(inode);
 	if (IS_DIRSYNC(inode))
 		ext4_handle_sync(handle);
@ -1034,28 +1042,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 	ext4_set_inode_state(inode, EXT4_STATE_NEW);

 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
-	    (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
-		ei->i_inline_off = 0;
-		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-			EXT4_FEATURE_INCOMPAT_INLINE_DATA))
-			ext4_set_inode_state(inode,
-			EXT4_STATE_MAY_INLINE_DATA);
-	} else {
-		/* Inline data and encryption are incompatible
-		 * We turn off inline data since encryption is enabled */
-		ei->i_inline_off = 1;
-		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-			EXT4_FEATURE_INCOMPAT_INLINE_DATA))
-			ext4_clear_inode_state(inode,
-			EXT4_STATE_MAY_INLINE_DATA);
-	}
-#else
 	ei->i_inline_off = 0;
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
 		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
-#endif
 	ret = inode;
 	err = dquot_alloc_inode(inode);
 	if (err)
@ -1082,6 +1071,12 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 		ei->i_datasync_tid = handle->h_transaction->t_tid;
 	}

+	if (encrypt) {
+		err = ext4_inherit_context(dir, inode);
+		if (err)
+			goto fail_free_drop;
+	}
+
 	err = ext4_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext4_std_error(sb, err);
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 				       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
 		EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
 				 "non-extent mapped inodes with bigalloc");
-		return -ENOSPC;
+		return -EUCLEAN;
 	}

 	/* Set up for the direct block allocation */
@ -576,6 +576,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags = EXT4_MB_HINT_DATA;
 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+	if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+		ar.flags |= EXT4_MB_USE_RESERVED;

 	ar.goal = ext4_find_goal(inode, map->m_lblk, partial);

--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@ -995,20 +995,18 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
 * and -EEXIST if directory entry already exists.
 */
 static int ext4_add_dirent_to_inline(handle_t *handle,
+				     struct ext4_filename *fname,
 				     struct dentry *dentry,
 				     struct inode *inode,
 				     struct ext4_iloc *iloc,
 				     void *inline_start, int inline_size)
 {
 	struct inode	*dir = d_inode(dentry->d_parent);
-	const char	*name = dentry->d_name.name;
-	int		namelen = dentry->d_name.len;
 	int		err;
 	struct ext4_dir_entry_2 *de;

-	err = ext4_find_dest_de(dir, inode, iloc->bh,
-				inline_start, inline_size,
-				name, namelen, &de);
+	err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
+				inline_size, fname, &de);
 	if (err)
 		return err;

@ -1016,8 +1014,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 	err = ext4_journal_get_write_access(handle, iloc->bh);
 	if (err)
 		return err;
-	ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
-			   name, namelen);
+	ext4_insert_dentry(dir, inode, de, inline_size, fname);

 	ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);

@ -1248,8 +1245,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
 * If succeeds, return 0. If not, extended the inline dir and copied data to
 * the new created block.
 */
-int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
-			      struct inode *inode)
+int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
+			      struct dentry *dentry, struct inode *inode)
 {
 	int ret, inline_size;
 	void *inline_start;
@ -1268,7 +1265,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
 						 EXT4_INLINE_DOTDOT_SIZE;
 	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;

-	ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
+	ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
 					inline_start, inline_size);
 	if (ret != -ENOSPC)
 		goto out;
@ -1289,8 +1286,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
 	if (inline_size) {
 		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);

-		ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
-						inline_start, inline_size);
+		ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+						inode, &iloc, inline_start,
+						inline_size);

 		if (ret != -ENOSPC)
 			goto out;
@ -1611,6 +1609,7 @@ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
 }

 struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+					struct ext4_filename *fname,
 					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data)
@ -1632,8 +1631,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
 						EXT4_INLINE_DOTDOT_SIZE;
 	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
-	ret = search_dir(iloc.bh, inline_start, inline_size,
-			 dir, d_name, 0, res_dir);
+	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+			      dir, fname, d_name, 0, res_dir);
 	if (ret == 1)
 		goto out_find;
 	if (ret < 0)
@ -1645,8 +1644,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 	inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 	inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;

-	ret = search_dir(iloc.bh, inline_start, inline_size,
-			 dir, d_name, 0, res_dir);
+	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+			      dir, fname, d_name, 0, res_dir);
 	if (ret == 1)
 		goto out_find;

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@ -731,18 +731,18 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 * `handle' can be NULL if create is zero
 */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
-				ext4_lblk_t block, int create)
+				ext4_lblk_t block, int map_flags)
 {
 	struct ext4_map_blocks map;
 	struct buffer_head *bh;
+	int create = map_flags & EXT4_GET_BLOCKS_CREATE;
 	int err;

 	J_ASSERT(handle != NULL || create == 0);

 	map.m_lblk = block;
 	map.m_len = 1;
-	err = ext4_map_blocks(handle, inode, &map,
-			      create ? EXT4_GET_BLOCKS_CREATE : 0);
+	err = ext4_map_blocks(handle, inode, &map, map_flags);

 	if (err == 0)
 		return create ? ERR_PTR(-ENOSPC) : NULL;
@ -788,11 +788,11 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 }

 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
-			       ext4_lblk_t block, int create)
+			       ext4_lblk_t block, int map_flags)
 {
 	struct buffer_head *bh;

-	bh = ext4_getblk(handle, inode, block, create);
+	bh = ext4_getblk(handle, inode, block, map_flags);
 	if (IS_ERR(bh))
 		return bh;
 	if (!bh || buffer_uptodate(bh))
@ -1261,13 +1261,12 @@ static int ext4_journalled_write_end(struct file *file,
 }

 /*
- * Reserve a single cluster located at lblock
+ * Reserve space for a single cluster
 */
-static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
+static int ext4_da_reserve_space(struct inode *inode)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned int md_needed;
 	int ret;

 	/*
@ -1279,25 +1278,14 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 	if (ret)
 		return ret;

-	/*
-	 * recalculate the amount of metadata blocks to reserve
-	 * in order to allocate nrblocks
-	 * worse case is one extent per block
-	 */
 	spin_lock(&ei->i_block_reservation_lock);
-	/*
-	 * ext4_calc_metadata_amount() has side effects, which we have
-	 * to be prepared undo if we fail to claim space.
-	 */
-	md_needed = 0;
-	trace_ext4_da_reserve_space(inode, 0);
-
 	if (ext4_claim_free_clusters(sbi, 1, 0)) {
 		spin_unlock(&ei->i_block_reservation_lock);
 		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
 		return -ENOSPC;
 	}
 	ei->i_reserved_data_blocks++;
+	trace_ext4_da_reserve_space(inode);
 	spin_unlock(&ei->i_block_reservation_lock);

 	return 0;       /* success */
@ -1566,9 +1554,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 		 * then we don't need to reserve it again. However we still need
 		 * to reserve metadata for every block we're going to write.
 		 */
-		if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
+		if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
 		    !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
-			ret = ext4_da_reserve_space(inode, iblock);
+			ret = ext4_da_reserve_space(inode);
 			if (ret) {
 				/* not enough space to reserve */
 				retval = ret;
@ -1701,19 +1689,32 @@ static int __ext4_journalled_writepage(struct page *page,
 		ext4_walk_page_buffers(handle, page_bufs, 0, len,
 				       NULL, bget_one);
 	}
-	/* As soon as we unlock the page, it can go away, but we have
-	 * references to buffers so we are safe */
+	/*
+	 * We need to release the page lock before we start the
+	 * journal, so grab a reference so the page won't disappear
+	 * out from under us.
+	 */
+	get_page(page);
 	unlock_page(page);

 	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
 				    ext4_writepage_trans_blocks(inode));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
+		put_page(page);
+		goto out_no_pagelock;
+	}
+	BUG_ON(!ext4_handle_valid(handle));
+
+	lock_page(page);
+	put_page(page);
+	if (page->mapping != mapping) {
+		/* The page got truncated from under us */
+		ext4_journal_stop(handle);
+		ret = 0;
 		goto out;
 	}

-	BUG_ON(!ext4_handle_valid(handle));
-
 	if (inline_data) {
 		BUFFER_TRACE(inode_bh, "get write access");
 		ret = ext4_journal_get_write_access(handle, inode_bh);
@ -1739,6 +1740,8 @@ static int __ext4_journalled_writepage(struct page *page,
 				       NULL, bput_one);
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 out:
+	unlock_page(page);
+out_no_pagelock:
 	brelse(inode_bh);
 	return ret;
 }
@ -4681,8 +4684,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		ext4_journal_stop(handle);
 	}

-	if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+	if (attr->ia_valid & ATTR_SIZE) {
 		handle_t *handle;
+		loff_t oldsize = inode->i_size;
+		int shrink = (attr->ia_size <= inode->i_size);

 		if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
 			struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -4690,24 +4695,26 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			if (attr->ia_size > sbi->s_bitmap_maxbytes)
 				return -EFBIG;
 		}
+		if (!S_ISREG(inode->i_mode))
+			return -EINVAL;

 		if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
 			inode_inc_iversion(inode);

-		if (S_ISREG(inode->i_mode) &&
+		if (ext4_should_order_data(inode) &&
 		    (attr->ia_size < inode->i_size)) {
-			if (ext4_should_order_data(inode)) {
-				error = ext4_begin_ordered_truncate(inode,
+			error = ext4_begin_ordered_truncate(inode,
 							    attr->ia_size);
-				if (error)
-					goto err_out;
-			}
+			if (error)
+				goto err_out;
+		}
+		if (attr->ia_size != inode->i_size) {
 			handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
 			if (IS_ERR(handle)) {
 				error = PTR_ERR(handle);
 				goto err_out;
 			}
-			if (ext4_handle_valid(handle)) {
+			if (ext4_handle_valid(handle) && shrink) {
 				error = ext4_orphan_add(handle, inode);
 				orphan = 1;
 			}
@ -4726,15 +4733,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			up_write(&EXT4_I(inode)->i_data_sem);
 			ext4_journal_stop(handle);
 			if (error) {
-				ext4_orphan_del(NULL, inode);
+				if (orphan)
+					ext4_orphan_del(NULL, inode);
 				goto err_out;
 			}
-		} else {
-			loff_t oldsize = inode->i_size;
-
-			i_size_write(inode, attr->ia_size);
-			pagecache_isize_extended(inode, oldsize, inode->i_size);
 		}
+		if (!shrink)
+			pagecache_isize_extended(inode, oldsize, inode->i_size);

 		/*
 		 * Blocks are going to be removed from the inode. Wait
@ -4754,13 +4759,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		 * in data=journal mode to make pages freeable.
 		 */
 		truncate_pagecache(inode, inode->i_size);
+		if (shrink)
+			ext4_truncate(inode);
 	}
-	/*
-	 * We want to call ext4_truncate() even if attr->ia_size ==
-	 * inode->i_size for cases like truncation of fallocated space
-	 */
-	if (attr->ia_valid & ATTR_SIZE)
-		ext4_truncate(inode);

 	if (!rc) {
 		setattr_copy(inode, attr);
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@ -31,14 +31,11 @@
 static void memswap(void *a, void *b, size_t len)
 {
 	unsigned char *ap, *bp;
-	unsigned char tmp;

 	ap = (unsigned char *)a;
 	bp = (unsigned char *)b;
 	while (len-- > 0) {
-		tmp = *ap;
-		*ap = *bp;
-		*bp = tmp;
+		swap(*ap, *bp);
 		ap++;
 		bp++;
 	}
@ -675,8 +672,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			if (err)
 				return err;
 		}
-		if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
-				 16))
+		if (copy_to_user((void __user *) arg,
+				 sbi->s_es->s_encrypt_pw_salt, 16))
 			return -EFAULT;
 		return 0;
 	}
@ -690,7 +687,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		err = ext4_get_policy(inode, &policy);
 		if (err)
 			return err;
-		if (copy_to_user((void *)arg, &policy, sizeof(policy)))
+		if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
 			return -EFAULT;
 		return 0;
 #else
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@ -882,10 +882,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)

 	/* wait for I/O completion */
 	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
-		if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
+		if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i]))
 			err = -EIO;
-			goto out;
-		}
 	}

 	first_block = page->index * blocks_per_page;
@ -898,6 +896,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 			/* skip initialized uptodate buddy */
 			continue;

+		if (!buffer_verified(bh[group - first_group]))
+			/* Skip faulty bitmaps */
+			continue;
+		err = 0;
+
 		/*
 		 * data carry information regarding this
 		 * particular group in the format specified
@ -2008,7 +2011,12 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
 	}
 }

-/* This is now called BEFORE we load the buddy bitmap. */
+/*
+ * This is now called BEFORE we load the buddy bitmap.
+ * Returns either 1 or 0 indicating that the group is either suitable
+ * for the allocation or not. In addition it can also return negative
+ * error code when something goes wrong.
+ */
 static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 				ext4_group_t group, int cr)
 {
@ -2031,7 +2039,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
 		int ret = ext4_mb_init_group(ac->ac_sb, group);
 		if (ret)
-			return 0;
+			return ret;
 	}

 	fragments = grp->bb_fragments;
@ -2078,7 +2086,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
 	ext4_group_t ngroups, group, i;
 	int cr;
-	int err = 0;
+	int err = 0, first_err = 0;
 	struct ext4_sb_info *sbi;
 	struct super_block *sb;
 	struct ext4_buddy e4b;
@ -2145,6 +2153,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		group = ac->ac_g_ex.fe_group;

 		for (i = 0; i < ngroups; group++, i++) {
+			int ret = 0;
 			cond_resched();
 			/*
 			 * Artificially restricted ngroups for non-extent
@ -2154,8 +2163,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 				group = 0;

 			/* This now checks without needing the buddy page */
-			if (!ext4_mb_good_group(ac, group, cr))
+			ret = ext4_mb_good_group(ac, group, cr);
+			if (ret <= 0) {
+				if (!first_err)
+					first_err = ret;
 				continue;
+			}

 			err = ext4_mb_load_buddy(sb, group, &e4b);
 			if (err)
@ -2167,9 +2180,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 			 * We need to check again after locking the
 			 * block group
 			 */
-			if (!ext4_mb_good_group(ac, group, cr)) {
+			ret = ext4_mb_good_group(ac, group, cr);
+			if (ret <= 0) {
 				ext4_unlock_group(sb, group);
 				ext4_mb_unload_buddy(&e4b);
+				if (!first_err)
+					first_err = ret;
 				continue;
 			}

@ -2216,6 +2232,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		}
 	}
 out:
+	if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
+		err = first_err;
 	return err;
 }

@ -2257,12 +2275,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)

 	group--;
 	if (group == 0)
-		seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
-				"[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
-				  "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
-			   "group", "free", "frags", "first",
-			   "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
-			   "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
+		seq_puts(seq, "#group: free  frags first ["
+			      " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
+			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]");

 	i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
 		sizeof(struct ext4_group_info);
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@ -166,12 +166,9 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
 	 */
 	wait_on_page_writeback(page[0]);
 	wait_on_page_writeback(page[1]);
-	if (inode1 > inode2) {
-		struct page *tmp;
-		tmp = page[0];
-		page[0] = page[1];
-		page[1] = tmp;
-	}
+	if (inode1 > inode2)
+		swap(page[0], page[1]);
+
 	return 0;
 }

@ -574,12 +571,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
-	/* TODO: This is non obvious task to swap blocks for inodes with full
-	   jornaling enabled */
+
+	/* TODO: it's not obvious how to swap blocks for inodes with full
+	   journaling enabled */
 	if (ext4_should_journal_data(orig_inode) ||
 	    ext4_should_journal_data(donor_inode)) {
-		return -EINVAL;
+		ext4_msg(orig_inode->i_sb, KERN_ERR,
+			 "Online defrag not supported with data journaling");
+		return -EOPNOTSUPP;
 	}
+
 	/* Protect orig and donor inodes against a truncate */
 	lock_two_nondirectories(orig_inode, donor_inode);

--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@ -84,7 +84,7 @@ static void ext4_finish_bio(struct bio *bio)
 			/* The bounce data pages are unmapped. */
 			data_page = page;
 			ctx = (struct ext4_crypto_ctx *)page_private(data_page);
-			page = ctx->control_page;
+			page = ctx->w.control_page;
 		}
 #endif

--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@ -54,8 +54,8 @@ static void completion_pages(struct work_struct *work)
 {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 	struct ext4_crypto_ctx *ctx =
-		container_of(work, struct ext4_crypto_ctx, work);
-	struct bio	*bio	= ctx->bio;
+		container_of(work, struct ext4_crypto_ctx, r.work);
+	struct bio	*bio	= ctx->r.bio;
 	struct bio_vec	*bv;
 	int		i;

@ -109,9 +109,9 @@ static void mpage_end_io(struct bio *bio, int err)
 		if (err) {
 			ext4_release_crypto_ctx(ctx);
 		} else {
-			INIT_WORK(&ctx->work, completion_pages);
-			ctx->bio = bio;
-			queue_work(ext4_read_workqueue, &ctx->work);
+			INIT_WORK(&ctx->r.work, completion_pages);
+			ctx->r.bio = bio;
+			queue_work(ext4_read_workqueue, &ctx->r.work);
 			return;
 		}
 	}
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@ -591,14 +591,17 @@ void __ext4_msg(struct super_block *sb,
 	va_end(args);
 }

+#define ext4_warning_ratelimit(sb)					\
+		___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),	\
+			     "EXT4-fs warning")
+
 void __ext4_warning(struct super_block *sb, const char *function,
 		    unsigned int line, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;

-	if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
-			  "EXT4-fs warning"))
+	if (!ext4_warning_ratelimit(sb))
 		return;

 	va_start(args, fmt);
@ -609,6 +612,24 @@ void __ext4_warning(struct super_block *sb, const char *function,
 	va_end(args);
 }

+void __ext4_warning_inode(const struct inode *inode, const char *function,
+			  unsigned int line, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	if (!ext4_warning_ratelimit(inode->i_sb))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
+	       "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
+	       function, line, inode->i_ino, current->comm, &vaf);
+	va_end(args);
+}
+
 void __ext4_grp_locked_error(const char *function, unsigned int line,
 			     struct super_block *sb, ext4_group_t grp,
 			     unsigned long ino, ext4_fsblk_t block,
@ -807,6 +828,7 @@ static void ext4_put_super(struct super_block *sb)
 		dump_orphan_list(sb, sbi);
 	J_ASSERT(list_empty(&sbi->s_orphan));

+	sync_blockdev(sb->s_bdev);
 	invalidate_bdev(sb->s_bdev);
 	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 		/*
@ -879,9 +901,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	atomic_set(&ei->i_unwritten, 0);
 	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
+	ei->i_crypt_info = NULL;
 #endif
-
 	return &ei->vfs_inode;
 }

@ -958,6 +979,10 @@ void ext4_clear_inode(struct inode *inode)
 		jbd2_free_inode(EXT4_I(inode)->jinode);
 		EXT4_I(inode)->jinode = NULL;
 	}
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+	if (EXT4_I(inode)->i_crypt_info)
+		ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
+#endif
 }

 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@ -3449,11 +3474,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (sb->s_bdev->bd_part)
 		sbi->s_sectors_written_start =
 			part_stat_read(sb->s_bdev->bd_part, sectors[1]);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	/* Modes of operations for file and directory encryption. */
-	sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
-	sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
-#endif

 	/* Cleanup superblock name */
 	for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@ -4067,7 +4087,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	}

-	if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
+	if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
+	     EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
+	    (blocksize != PAGE_CACHE_SIZE)) {
+		ext4_msg(sb, KERN_ERR,
+			 "Unsupported blocksize for fs encryption");
+		goto failed_mount_wq;
+	}
+
+	if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
 	    !(sb->s_flags & MS_RDONLY) &&
 	    !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
@ -4943,6 +4971,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 		set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 	}

+	if (*flags & MS_LAZYTIME)
+		sb->s_flags |= MS_LAZYTIME;
+
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
 		if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
 			err = -EROFS;
@ -5410,6 +5441,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	struct inode *inode = sb_dqopt(sb)->files[type];
 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err, offset = off & (sb->s_blocksize - 1);
+	int retries = 0;
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();

@ -5430,7 +5462,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 		return -EIO;
 	}

-	bh = ext4_bread(handle, inode, blk, 1);
+	do {
+		bh = ext4_bread(handle, inode, blk,
+				EXT4_GET_BLOCKS_CREATE |
+				EXT4_GET_BLOCKS_METADATA_NOFAIL);
+	} while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
+		 ext4_should_retry_alloc(inode->i_sb, &retries));
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@ -5647,6 +5684,7 @@ static int __init ext4_init_fs(void)

 static void __exit ext4_exit_fs(void)
 {
+	ext4_exit_crypto();
 	ext4_destroy_lazyinit_thread();
 	unregister_as_ext2();
 	unregister_as_ext3();
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@ -23,31 +23,28 @@
 #include "xattr.h"

 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
+static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
 {
 	struct page *cpage = NULL;
 	char *caddr, *paddr = NULL;
 	struct ext4_str cstr, pstr;
 	struct inode *inode = d_inode(dentry);
-	struct ext4_fname_crypto_ctx *ctx = NULL;
 	struct ext4_encrypted_symlink_data *sd;
 	loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
 	int res;
 	u32 plen, max_size = inode->i_sb->s_blocksize;

-	ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
-	if (IS_ERR(ctx))
-		return ERR_CAST(ctx);
+	res = ext4_get_encryption_info(inode);
+	if (res)
+		return ERR_PTR(res);

 	if (ext4_inode_is_fast_symlink(inode)) {
 		caddr = (char *) EXT4_I(inode)->i_data;
 		max_size = sizeof(EXT4_I(inode)->i_data);
 	} else {
 		cpage = read_mapping_page(inode->i_mapping, 0, NULL);
-		if (IS_ERR(cpage)) {
-			ext4_put_fname_crypto_ctx(&ctx);
+		if (IS_ERR(cpage))
 			return ERR_CAST(cpage);
-		}
 		caddr = kmap(cpage);
 		caddr[size] = 0;
 	}
@ -71,20 +68,19 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
 		goto errout;
 	}
 	pstr.name = paddr;
-	res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
+	pstr.len = plen;
+	res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
 	if (res < 0)
 		goto errout;
 	/* Null-terminate the name */
 	if (res <= plen)
 		paddr[res] = '\0';
-	ext4_put_fname_crypto_ctx(&ctx);
 	if (cpage) {
 		kunmap(cpage);
 		page_cache_release(cpage);
 	}
 	return *cookie = paddr;
 errout:
-	ext4_put_fname_crypto_ctx(&ctx);
 	if (cpage) {
 		kunmap(cpage);
 		page_cache_release(cpage);
@ -95,7 +91,7 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)

 const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 	.readlink	= generic_readlink,
-	.follow_link    = ext4_follow_link,
+	.follow_link    = ext4_encrypted_follow_link,
 	.put_link       = kfree_put_link,
 	.setattr	= ext4_setattr,
 	.setxattr	= generic_setxattr,
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	unsigned long	blocknr;

 	if (is_journal_aborted(journal))
-		return 1;
+		return -EIO;

 	if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
 		return 1;
@ -405,10 +405,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	 * jbd2_cleanup_journal_tail() doesn't get called all that often.
 	 */
 	if (journal->j_flags & JBD2_BARRIER)
-		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);

-	__jbd2_update_log_tail(journal, first_tid, blocknr);
-	return 0;
+	return __jbd2_update_log_tail(journal, first_tid, blocknr);
 }


--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@ -371,16 +371,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	 */
 	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));

-retry_alloc:
-	new_bh = alloc_buffer_head(GFP_NOFS);
-	if (!new_bh) {
-		/*
-		 * Failure is not an option, but __GFP_NOFAIL is going
-		 * away; so we retry ourselves here.
-		 */
-		congestion_wait(BLK_RW_ASYNC, HZ/50);
-		goto retry_alloc;
-	}
+	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);

 	/* keep subsequent assertions sane */
 	atomic_set(&new_bh->b_count, 1);
@ -885,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 *
 * Requires j_checkpoint_mutex
 */
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 {
 	unsigned long freed;
+	int ret;

 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));

@ -897,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 	 * space and if we lose sb update during power failure we'd replay
 	 * old transaction with possibly newly overwritten data.
 	 */
-	jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+	ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+	if (ret)
+		goto out;
+
 	write_lock(&journal->j_state_lock);
 	freed = block - journal->j_tail;
 	if (block < journal->j_tail)
@ -913,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 	journal->j_tail_sequence = tid;
 	journal->j_tail = block;
 	write_unlock(&journal->j_state_lock);
+
+out:
+	return ret;
 }

 /*
@ -1331,7 +1329,7 @@ static int journal_reset(journal_t *journal)
 	return jbd2_journal_start_thread(journal);
 }

-static void jbd2_write_superblock(journal_t *journal, int write_op)
+static int jbd2_write_superblock(journal_t *journal, int write_op)
 {
 	struct buffer_head *bh = journal->j_sb_buffer;
 	journal_superblock_t *sb = journal->j_superblock;
@ -1370,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
 		printk(KERN_ERR "JBD2: Error %d detected when updating "
 		       "journal superblock for %s.\n", ret,
 		       journal->j_devname);
+		jbd2_journal_abort(journal, ret);
 	}
+
+	return ret;
 }

 /**
@ -1383,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
 * Update a journal's superblock information about log tail and write it to
 * disk, waiting for the IO to complete.
 */
-void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 				     unsigned long tail_block, int write_op)
 {
 	journal_superblock_t *sb = journal->j_superblock;
+	int ret;

 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@ -1395,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 	sb->s_sequence = cpu_to_be32(tail_tid);
 	sb->s_start    = cpu_to_be32(tail_block);

-	jbd2_write_superblock(journal, write_op);
+	ret = jbd2_write_superblock(journal, write_op);
+	if (ret)
+		goto out;

 	/* Log is no longer empty */
 	write_lock(&journal->j_state_lock);
 	WARN_ON(!sb->s_sequence);
 	journal->j_flags &= ~JBD2_FLUSHED;
 	write_unlock(&journal->j_state_lock);
+
+out:
+	return ret;
 }

 /**
@ -1950,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
 		return -EIO;

 	mutex_lock(&journal->j_checkpoint_mutex);
-	jbd2_cleanup_journal_tail(journal);
+	if (!err) {
+		err = jbd2_cleanup_journal_tail(journal);
+		if (err < 0) {
+			mutex_unlock(&journal->j_checkpoint_mutex);
+			goto out;
+		}
+		err = 0;
+	}

 	/* Finally, mark the journal as really needing no recovery.
 	 * This sets s_start==0 in the underlying superblock, which is
@ -1966,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
 	J_ASSERT(journal->j_head == journal->j_tail);
 	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
 	write_unlock(&journal->j_state_lock);
-	return 0;
+out:
+	return err;
 }

 /**
@ -2330,7 +2345,7 @@ static int jbd2_journal_init_journal_head_cache(void)
 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
-				SLAB_TEMPORARY,	/* flags */
+				SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
 				NULL);		/* ctor */
 	retval = 0;
 	if (!jbd2_journal_head_cache) {
@ -2362,10 +2377,8 @@ static struct journal_head *journal_alloc_journal_head(void)
 	if (!ret) {
 		jbd_debug(1, "out of memory for journal_head\n");
 		pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
-		while (!ret) {
-			yield();
-			ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
-		}
+		ret = kmem_cache_zalloc(jbd2_journal_head_cache,
+				GFP_NOFS | __GFP_NOFAIL);
 	}
 	return ret;
 }
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@ -141,11 +141,13 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 {
 	struct list_head *hash_list;
 	struct jbd2_revoke_record_s *record;
+	gfp_t gfp_mask = GFP_NOFS;

-repeat:
-	record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
+	if (journal_oom_retry)
+		gfp_mask |= __GFP_NOFAIL;
+	record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
 	if (!record)
-		goto oom;
+		return -ENOMEM;

 	record->sequence = seq;
 	record->blocknr = blocknr;
@ -154,13 +156,6 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 	list_add(&record->hash, hash_list);
 	spin_unlock(&journal->j_revoke_lock);
 	return 0;
-
-oom:
-	if (!journal_oom_retry)
-		return -ENOMEM;
-	jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
-	yield();
-	goto repeat;
 }

 /* Find a revoke record in the journal's hash table. */
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@ -278,22 +278,16 @@ static int start_this_handle(journal_t *journal, handle_t *handle,

 alloc_transaction:
 	if (!journal->j_running_transaction) {
+		/*
+		 * If __GFP_FS is not present, then we may be being called from
+		 * inside the fs writeback layer, so we MUST NOT fail.
+		 */
+		if ((gfp_mask & __GFP_FS) == 0)
+			gfp_mask |= __GFP_NOFAIL;
 		new_transaction = kmem_cache_zalloc(transaction_cache,
 						    gfp_mask);
-		if (!new_transaction) {
-			/*
-			 * If __GFP_FS is not present, then we may be
-			 * being called from inside the fs writeback
-			 * layer, so we MUST NOT fail.  Since
-			 * __GFP_NOFAIL is going away, we will arrange
-			 * to retry the allocation ourselves.
-			 */
-			if ((gfp_mask & __GFP_FS) == 0) {
-				congestion_wait(BLK_RW_ASYNC, HZ/50);
-				goto alloc_transaction;
-			}
+		if (!new_transaction)
 			return -ENOMEM;
-		}
 	}

 	jbd_debug(3, "New handle %p going live.\n", handle);
@ -761,6 +755,30 @@ static void warn_dirty_buffer(struct buffer_head *bh)
 	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }

+/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
+static void jbd2_freeze_jh_data(struct journal_head *jh)
+{
+	struct page *page;
+	int offset;
+	char *source;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
+	page = bh->b_page;
+	offset = offset_in_page(bh->b_data);
+	source = kmap_atomic(page);
+	/* Fire data frozen trigger just before we copy the data */
+	jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
+	memcpy(jh->b_frozen_data, source + offset, bh->b_size);
+	kunmap_atomic(source);
+
+	/*
+	 * Now that the frozen data is saved off, we need to store any matching
+	 * triggers.
+	 */
+	jh->b_frozen_triggers = jh->b_triggers;
+}
+
 /*
 * If the buffer is already part of the current transaction, then there
 * is nothing we need to do.  If it is already part of a prior
@ -780,7 +798,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	journal_t *journal;
 	int error;
 	char *frozen_buffer = NULL;
-	int need_copy = 0;
 	unsigned long start_lock, time_lock;

 	if (is_handle_aborted(handle))
@ -866,6 +883,26 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	 */
       jh->b_modified = 0;

+	/*
+	 * If the buffer is not journaled right now, we need to make sure it
+	 * doesn't get written to disk before the caller actually commits the
+	 * new data
+	 */
+	if (!jh->b_transaction) {
+		JBUFFER_TRACE(jh, "no transaction");
+		J_ASSERT_JH(jh, !jh->b_next_transaction);
+		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		/*
+		 * Make sure all stores to jh (b_modified, b_frozen_data) are
+		 * visible before attaching it to the running transaction.
+		 * Paired with barrier in jbd2_write_access_granted()
+		 */
+		smp_wmb();
+		spin_lock(&journal->j_list_lock);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+		spin_unlock(&journal->j_list_lock);
+		goto done;
+	}
 	/*
 	 * If there is already a copy-out version of this buffer, then we don't
 	 * need to make another one
@ -873,113 +910,70 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	if (jh->b_frozen_data) {
 		JBUFFER_TRACE(jh, "has frozen data");
 		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-		jh->b_next_transaction = transaction;
-		goto done;
-	}
-
-	/* Is there data here we need to preserve? */
-
-	if (jh->b_transaction && jh->b_transaction != transaction) {
-		JBUFFER_TRACE(jh, "owned by older transaction");
-		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-		J_ASSERT_JH(jh, jh->b_transaction ==
-					journal->j_committing_transaction);
-
-		/* There is one case we have to be very careful about.
-		 * If the committing transaction is currently writing
-		 * this buffer out to disk and has NOT made a copy-out,
-		 * then we cannot modify the buffer contents at all
-		 * right now.  The essence of copy-out is that it is the
-		 * extra copy, not the primary copy, which gets
-		 * journaled.  If the primary copy is already going to
-		 * disk then we cannot do copy-out here. */
-
-		if (buffer_shadow(bh)) {
-			JBUFFER_TRACE(jh, "on shadow: sleep");
-			jbd_unlock_bh_state(bh);
-			wait_on_bit_io(&bh->b_state, BH_Shadow,
-				       TASK_UNINTERRUPTIBLE);
-			goto repeat;
-		}
-
-		/*
-		 * Only do the copy if the currently-owning transaction still
-		 * needs it. If buffer isn't on BJ_Metadata list, the
-		 * committing transaction is past that stage (here we use the
-		 * fact that BH_Shadow is set under bh_state lock together with
-		 * refiling to BJ_Shadow list and at this point we know the
-		 * buffer doesn't have BH_Shadow set).
-		 *
-		 * Subtle point, though: if this is a get_undo_access,
-		 * then we will be relying on the frozen_data to contain
-		 * the new value of the committed_data record after the
-		 * transaction, so we HAVE to force the frozen_data copy
-		 * in that case.
-		 */
-		if (jh->b_jlist == BJ_Metadata || force_copy) {
-			JBUFFER_TRACE(jh, "generate frozen data");
-			if (!frozen_buffer) {
-				JBUFFER_TRACE(jh, "allocate memory for buffer");
-				jbd_unlock_bh_state(bh);
-				frozen_buffer =
-					jbd2_alloc(jh2bh(jh)->b_size,
-							 GFP_NOFS);
-				if (!frozen_buffer) {
-					printk(KERN_ERR
-					       "%s: OOM for frozen_buffer\n",
-					       __func__);
-					JBUFFER_TRACE(jh, "oom!");
-					error = -ENOMEM;
-					jbd_lock_bh_state(bh);
-					goto done;
-				}
-				goto repeat;
-			}
-			jh->b_frozen_data = frozen_buffer;
-			frozen_buffer = NULL;
-			need_copy = 1;
-		}
-		jh->b_next_transaction = transaction;
+		goto attach_next;
 	}

+	JBUFFER_TRACE(jh, "owned by older transaction");
+	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);

 	/*
-	 * Finally, if the buffer is not journaled right now, we need to make
-	 * sure it doesn't get written to disk before the caller actually
-	 * commits the new data
+	 * There is one case we have to be very careful about.  If the
+	 * committing transaction is currently writing this buffer out to disk
+	 * and has NOT made a copy-out, then we cannot modify the buffer
+	 * contents at all right now.  The essence of copy-out is that it is
+	 * the extra copy, not the primary copy, which gets journaled.  If the
+	 * primary copy is already going to disk then we cannot do copy-out
+	 * here.
 	 */
-	if (!jh->b_transaction) {
-		JBUFFER_TRACE(jh, "no transaction");
-		J_ASSERT_JH(jh, !jh->b_next_transaction);
-		JBUFFER_TRACE(jh, "file as BJ_Reserved");
-		spin_lock(&journal->j_list_lock);
-		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
-		spin_unlock(&journal->j_list_lock);
+	if (buffer_shadow(bh)) {
+		JBUFFER_TRACE(jh, "on shadow: sleep");
+		jbd_unlock_bh_state(bh);
+		wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
+		goto repeat;
 	}

+	/*
+	 * Only do the copy if the currently-owning transaction still needs it.
+	 * If buffer isn't on BJ_Metadata list, the committing transaction is
+	 * past that stage (here we use the fact that BH_Shadow is set under
+	 * bh_state lock together with refiling to BJ_Shadow list and at this
+	 * point we know the buffer doesn't have BH_Shadow set).
+	 *
+	 * Subtle point, though: if this is a get_undo_access, then we will be
+	 * relying on the frozen_data to contain the new value of the
+	 * committed_data record after the transaction, so we HAVE to force the
+	 * frozen_data copy in that case.
+	 */
+	if (jh->b_jlist == BJ_Metadata || force_copy) {
+		JBUFFER_TRACE(jh, "generate frozen data");
+		if (!frozen_buffer) {
+			JBUFFER_TRACE(jh, "allocate memory for buffer");
+			jbd_unlock_bh_state(bh);
+			frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+			if (!frozen_buffer) {
+				printk(KERN_ERR "%s: OOM for frozen_buffer\n",
+				       __func__);
+				JBUFFER_TRACE(jh, "oom!");
+				error = -ENOMEM;
+				goto out;
+			}
+			goto repeat;
+		}
+		jh->b_frozen_data = frozen_buffer;
+		frozen_buffer = NULL;
+		jbd2_freeze_jh_data(jh);
+	}
+attach_next:
+	/*
+	 * Make sure all stores to jh (b_modified, b_frozen_data) are visible
+	 * before attaching it to the running transaction. Paired with barrier
+	 * in jbd2_write_access_granted()
+	 */
+	smp_wmb();
+	jh->b_next_transaction = transaction;
+
 done:
-	if (need_copy) {
-		struct page *page;
-		int offset;
-		char *source;
-
-		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
-			    "Possible IO failure.\n");
-		page = jh2bh(jh)->b_page;
-		offset = offset_in_page(jh2bh(jh)->b_data);
-		source = kmap_atomic(page);
-		/* Fire data frozen trigger just before we copy the data */
-		jbd2_buffer_frozen_trigger(jh, source + offset,
-					   jh->b_triggers);
-		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
-		kunmap_atomic(source);
-
-		/*
-		 * Now that the frozen data is saved off, we need to store
-		 * any matching triggers.
-		 */
-		jh->b_frozen_triggers = jh->b_triggers;
-	}
 	jbd_unlock_bh_state(bh);

 	/*
@ -996,6 +990,55 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	return error;
 }

+/* Fast check whether buffer is already attached to the required transaction */
+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
+{
+	struct journal_head *jh;
+	bool ret = false;
+
+	/* Dirty buffers require special handling... */
+	if (buffer_dirty(bh))
+		return false;
+
+	/*
+	 * RCU protects us from dereferencing freed pages. So the checks we do
+	 * are guaranteed not to oops. However the jh slab object can get freed
+	 * & reallocated while we work with it. So we have to be careful. When
+	 * we see jh attached to the running transaction, we know it must stay
+	 * so until the transaction is committed. Thus jh won't be freed and
+	 * will be attached to the same bh while we run.  However it can
+	 * happen jh gets freed, reallocated, and attached to the transaction
+	 * just after we get pointer to it from bh. So we have to be careful
+	 * and recheck jh still belongs to our bh before we return success.
+	 */
+	rcu_read_lock();
+	if (!buffer_jbd(bh))
+		goto out;
+	/* This should be bh2jh() but that doesn't work with inline functions */
+	jh = READ_ONCE(bh->b_private);
+	if (!jh)
+		goto out;
+	if (jh->b_transaction != handle->h_transaction &&
+	    jh->b_next_transaction != handle->h_transaction)
+		goto out;
+	/*
+	 * There are two reasons for the barrier here:
+	 * 1) Make sure to fetch b_bh after we did previous checks so that we
+	 * detect when jh went through free, realloc, attach to transaction
+	 * while we were checking. Paired with implicit barrier in that path.
+	 * 2) So that access to bh done after jbd2_write_access_granted()
+	 * doesn't get reordered and see inconsistent state of concurrent
+	 * do_get_write_access().
+	 */
+	smp_mb();
+	if (unlikely(jh->b_bh != bh))
+		goto out;
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 /**
 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
 * @handle: transaction to add buffer modifications to
@ -1009,9 +1052,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,

 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 {
-	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
+	struct journal_head *jh;
 	int rc;

+	if (jbd2_write_access_granted(handle, bh))
+		return 0;
+
+	jh = jbd2_journal_add_journal_head(bh);
 	/* We do not want to get caught playing with fields which the
 	 * log thread also manipulates.  Make sure that the buffer
 	 * completes any outstanding IO before proceeding. */
@ -1141,11 +1188,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 {
 	int err;
-	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
+	struct journal_head *jh;
 	char *committed_data = NULL;

 	JBUFFER_TRACE(jh, "entry");
+	if (jbd2_write_access_granted(handle, bh))
+		return 0;

+	jh = jbd2_journal_add_journal_head(bh);
 	/*
 	 * Do this first --- it can drop the journal lock, so we want to
 	 * make sure that obtaining the committed_data is done
@ -1230,8 +1280,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
 	triggers->t_abort(triggers, jh2bh(jh));
 }

-
-
 /**
 * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
 * @handle: transaction to add buffer to.
@ -1264,12 +1312,36 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)

 	if (is_handle_aborted(handle))
 		return -EROFS;
-	journal = transaction->t_journal;
-	jh = jbd2_journal_grab_journal_head(bh);
-	if (!jh) {
+	if (!buffer_jbd(bh)) {
 		ret = -EUCLEAN;
 		goto out;
 	}
+	/*
+	 * We don't grab jh reference here since the buffer must be part
+	 * of the running transaction.
+	 */
+	jh = bh2jh(bh);
+	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+			jh->b_next_transaction == transaction);
+	if (jh->b_modified == 1) {
+		/*
+		 * If it's in our transaction it must be in BJ_Metadata list.
+		 * The assertion is unreliable since we may see jh in
+		 * inconsistent state unless we grab bh_state lock. But this
+		 * is crutial to catch bugs so let's do a reliable check until
+		 * the lockless handling is fully proven.
+		 */
+		if (jh->b_transaction == transaction &&
+		    jh->b_jlist != BJ_Metadata) {
+			jbd_lock_bh_state(bh);
+			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
+					jh->b_jlist == BJ_Metadata);
+			jbd_unlock_bh_state(bh);
+		}
+		goto out;
+	}
+
+	journal = transaction->t_journal;
 	jbd_debug(5, "journal_head %p\n", jh);
 	JBUFFER_TRACE(jh, "entry");

@ -1360,7 +1432,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
 	jbd_unlock_bh_state(bh);
-	jbd2_journal_put_journal_head(jh);
 out:
 	JBUFFER_TRACE(jh, "exit");
 	return ret;
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 			      unsigned long *block);
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);

 /* Commit management */
@ -1157,7 +1157,7 @@ extern int	   jbd2_journal_recover    (journal_t *journal);
 extern int	   jbd2_journal_wipe       (journal_t *, int);
 extern int	   jbd2_journal_skip_recovery	(journal_t *);
 extern void	   jbd2_journal_update_sb_errno(journal_t *);
-extern void	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
+extern int	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
 				unsigned long, int);
 extern void	   __jbd2_journal_abort_hard	(journal_t *);
 extern void	   jbd2_journal_abort      (journal_t *, int);
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@ -1185,15 +1185,14 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 );

 TRACE_EVENT(ext4_da_reserve_space,
-	TP_PROTO(struct inode *inode, int md_needed),
+	TP_PROTO(struct inode *inode),

-	TP_ARGS(inode, md_needed),
+	TP_ARGS(inode),

 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	i_blocks		)
-		__field(	int,	md_needed		)
 		__field(	int,	reserved_data_blocks	)
 		__field(	int,	reserved_meta_blocks	)
 		__field(	__u16,  mode			)
@ -1203,18 +1202,17 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->i_blocks = inode->i_blocks;
-		__entry->md_needed = md_needed;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
 		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 		__entry->mode	= inode->i_mode;
 	),

-	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
 		  "reserved_data_blocks %d reserved_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->md_needed, __entry->reserved_data_blocks,
+		  __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks)
 );

@ -2478,6 +2476,31 @@ TRACE_EVENT(ext4_collapse_range,
 		  __entry->offset, __entry->len)
 );

+TRACE_EVENT(ext4_insert_range,
+	TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
+
+	TP_ARGS(inode, offset, len),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t,	offset)
+		__field(loff_t, len)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->offset	= offset;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu offset %lld len %lld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->offset, __entry->len)
+);
+
 TRACE_EVENT(ext4_es_shrink,
 	TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
 		 int nr_skipped, int retried),