nfsd: Replace use of rwsem with errseq_t

commit 555dbf1a9aac6d3150c8b52fa35f768a692f4eeb upstream.

The nfsd_file nf_rwsem is currently being used to separate file write
and commit instances to ensure that we catch errors and apply them to
the correct write/commit.
We can improve scalability at the expense of a little accuracy (some
extra false positives) by replacing the nf_rwsem with more careful
use of the errseq_t mechanism to track errors across the different
operations.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[ cel: rebased on zero-verifier fix ]
Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Trond Myklebust 2021-12-18 20:38:01 -05:00 committed by Greg Kroah-Hartman
parent 56a7f57da5
commit f14816f2f9
4 changed files with 19 additions and 30 deletions

View File

@ -194,7 +194,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
} }
nf->nf_mark = NULL; nf->nf_mark = NULL;
init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf); trace_nfsd_file_alloc(nf);
} }
return nf; return nf;

View File

@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref; refcount_t nf_ref;
unsigned char nf_may; unsigned char nf_may;
struct nfsd_file_mark *nf_mark; struct nfsd_file_mark *nf_mark;
struct rw_semaphore nf_rwsem;
}; };
int nfsd_file_cache_init(void); int nfsd_file_cache_init(void);

View File

@ -1380,6 +1380,8 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{ {
struct file *dst = copy->nf_dst->nf_file;
struct file *src = copy->nf_src->nf_file;
ssize_t bytes_copied = 0; ssize_t bytes_copied = 0;
size_t bytes_total = copy->cp_count; size_t bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos; u64 src_pos = copy->cp_src_pos;
@ -1388,9 +1390,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do { do {
if (kthread_should_stop()) if (kthread_should_stop())
break; break;
bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
src_pos, copy->nf_dst->nf_file, dst_pos, bytes_total);
bytes_total);
if (bytes_copied <= 0) if (bytes_copied <= 0)
break; break;
bytes_total -= bytes_copied; bytes_total -= bytes_copied;

View File

@ -535,10 +535,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
{ {
struct file *src = nf_src->nf_file; struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file; struct file *dst = nf_dst->nf_file;
errseq_t since;
loff_t cloned; loff_t cloned;
__be32 ret = 0; __be32 ret = 0;
down_write(&nf_dst->nf_rwsem); since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) { if (cloned < 0) {
ret = nfserrno(cloned); ret = nfserrno(cloned);
@ -552,6 +553,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
if (!status)
status = filemap_check_wb_err(dst->f_mapping, since);
if (!status) if (!status)
status = commit_inode_metadata(file_inode(src)); status = commit_inode_metadata(file_inode(src));
if (status < 0) { if (status < 0) {
@ -561,7 +564,6 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
} }
} }
out_err: out_err:
up_write(&nf_dst->nf_rwsem);
return ret; return ret;
} }
@ -980,6 +982,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
struct file *file = nf->nf_file; struct file *file = nf->nf_file;
struct svc_export *exp; struct svc_export *exp;
struct iov_iter iter; struct iov_iter iter;
errseq_t since;
__be32 nfserr; __be32 nfserr;
int host_err; int host_err;
int use_wgather; int use_wgather;
@ -1009,21 +1012,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC; flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
since = READ_ONCE(file->f_wb_err);
if (flags & RWF_SYNC) { if (flags & RWF_SYNC) {
down_write(&nf->nf_rwsem);
host_err = vfs_iter_write(file, &iter, &pos, flags); host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) if (host_err < 0)
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
nfsd_net_id)); nfsd_net_id));
up_write(&nf->nf_rwsem);
} else { } else {
down_read(&nf->nf_rwsem);
if (verf) if (verf)
nfsd_copy_boot_verifier(verf, nfsd_copy_boot_verifier(verf,
net_generic(SVC_NET(rqstp), net_generic(SVC_NET(rqstp),
nfsd_net_id)); nfsd_net_id));
host_err = vfs_iter_write(file, &iter, &pos, flags); host_err = vfs_iter_write(file, &iter, &pos, flags);
up_read(&nf->nf_rwsem);
} }
if (host_err < 0) { if (host_err < 0) {
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
@ -1033,6 +1033,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
*cnt = host_err; *cnt = host_err;
nfsdstats.io_write += *cnt; nfsdstats.io_write += *cnt;
fsnotify_modify(file); fsnotify_modify(file);
host_err = filemap_check_wb_err(file->f_mapping, since);
if (host_err < 0)
goto out_nfserr;
if (stable && use_wgather) { if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file); host_err = wait_for_concurrent_writes(file);
@ -1113,19 +1116,6 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
} }
#ifdef CONFIG_NFSD_V3 #ifdef CONFIG_NFSD_V3
static int
nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
loff_t end)
{
struct address_space *mapping = nf->nf_file->f_mapping;
int ret = filemap_fdatawrite_range(mapping, offset, end);
if (ret)
return ret;
filemap_fdatawait_range_keep_errors(mapping, offset, end);
return 0;
}
/* /*
* Commit all pending writes to stable storage. * Commit all pending writes to stable storage.
* *
@ -1156,25 +1146,25 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err) if (err)
goto out; goto out;
if (EX_ISSYNC(fhp->fh_export)) { if (EX_ISSYNC(fhp->fh_export)) {
int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end); errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
down_write(&nf->nf_rwsem); err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
if (!err2)
err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) { switch (err2) {
case 0: case 0:
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id)); nfsd_net_id));
err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
since);
break; break;
case -EINVAL: case -EINVAL:
err = nfserr_notsupp; err = nfserr_notsupp;
break; break;
default: default:
err = nfserrno(err2);
nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_reset_boot_verifier(net_generic(nf->nf_net,
nfsd_net_id)); nfsd_net_id));
} }
up_write(&nf->nf_rwsem); err = nfserrno(err2);
} else } else
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id)); nfsd_net_id));