diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0796c45d0d4d..01bfe7662751 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -144,6 +144,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; + /* Reset the lock status after a server reboot so we resend */ + if (block->b_status == nlm_lck_denied_grace_period) + block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 7e529c3c45c0..9760ecb9b60f 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -550,9 +550,6 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - /* Resend the blocking lock request after a server reboot */ - if (resp->status == nlm_lck_denied_grace_period) - continue; if (resp->status != nlm_lck_blocked) break; } diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17b..6fc7b5cae92b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev) bl_pipe_msg.bl_wq = &nn->bl_wq; memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + msg->len = sizeof(bl_msg) + bl_msg.totallen; + msg->data = kzalloc(msg->len, GFP_NOFS); if (!msg->data) goto out; @@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev) memcpy(msg->data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&nn->bl_wq, &wq); if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 5088b57b078a..cff089a412c7 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2960512792c2..a13d26ede254 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -500,7 +500,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, &args->craa_type_mask)) pnfs_recall_all_layouts(cps->clp); if (flags) - nfs_expire_all_delegation_types(cps->clp, flags); + nfs_expire_unused_delegation_types(cps->clp, flags); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 84d8eae203a7..c513b0cc835f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -593,6 +593,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_DISCRTRY; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (!IS_ERR(clp->cl_rpcclient)) return 0; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6390a4b5fee7..57db3244f4d9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -64,17 +64,15 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) return ret; } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) +static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; int status = 0; - if (inode->i_flock == NULL) - return 0; - if (inode->i_flock == NULL) goto out; + /* Protect inode->i_flock using the file locks lock */ lock_flocks(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { @@ -83,7 +81,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ if (nfs_file_open_context(fl->fl_file) != ctx) continue; unlock_flocks(); - status = nfs4_lock_delegation_recall(state, fl); + status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; lock_flocks(); @@ -120,7 +118,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid); if (!err) - err = nfs_delegation_claim_locks(ctx, state); + err = nfs_delegation_claim_locks(ctx, state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); @@ -389,6 +387,24 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation return err; } +static bool nfs_delegation_need_return(struct nfs_delegation *delegation) +{ + bool ret = false; + + if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) + ret = true; + if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + struct inode *inode; + + spin_lock(&delegation->lock); + inode = delegation->inode; + if (inode && list_empty(&NFS_I(inode)->open_files)) + ret = true; + spin_unlock(&delegation->lock); + } + return ret; +} + /** * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process @@ -411,8 +427,7 @@ int nfs_client_return_marked_delegations(struct nfs_client *clp) list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (!test_and_clear_bit(NFS_DELEGATION_RETURN, - &delegation->flags)) + if (!nfs_delegation_need_return(delegation)) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) @@ -471,6 +486,13 @@ int nfs4_inode_return_delegation(struct inode *inode) return err; } +static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); +} + static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { @@ -478,6 +500,45 @@ static void nfs_mark_return_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } +static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + bool ret = false; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + nfs_mark_return_delegation(server, delegation); + ret = true; + } + return ret; +} + +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_server_mark_return_all_delegations(server); + rcu_read_unlock(); +} + +static void nfs_delegation_run_state_manager(struct nfs_client *clp) +{ + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); +} + +/** + * nfs_expire_all_delegations + * @clp: client to process + * + */ +void nfs_expire_all_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_all_delegations(clp); + nfs_delegation_run_state_manager(clp); +} + /** * nfs_super_return_all_delegations - return delegations for one superblock * @sb: sb to process @@ -486,24 +547,22 @@ static void nfs_mark_return_delegation(struct nfs_server *server, void nfs_server_return_all_delegations(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_delegation *delegation; + bool need_wait; if (clp == NULL) return; rcu_read_lock(); - list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - spin_lock(&delegation->lock); - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - spin_unlock(&delegation->lock); - } + need_wait = nfs_server_mark_return_all_delegations(server); rcu_read_unlock(); - if (nfs_client_return_marked_delegations(clp) != 0) + if (need_wait) { nfs4_schedule_state_manager(clp); + nfs4_wait_clnt_recover(clp); + } } -static void nfs_mark_return_all_delegation_types(struct nfs_server *server, +static void nfs_mark_return_unused_delegation_types(struct nfs_server *server, fmode_t flags) { struct nfs_delegation *delegation; @@ -512,27 +571,21 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } -static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, +static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { struct nfs_server *server; rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - nfs_mark_return_all_delegation_types(server, flags); + nfs_mark_return_unused_delegation_types(server, flags); rcu_read_unlock(); } -static void nfs_delegation_run_state_manager(struct nfs_client *clp) -{ - if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) - nfs4_schedule_state_manager(clp); -} - void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; @@ -546,27 +599,17 @@ void nfs_remove_bad_delegation(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** - * nfs_expire_all_delegation_types + * nfs_expire_unused_delegation_types * @clp: client to process * @flags: delegation types to expire * */ -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { - nfs_client_mark_return_all_delegation_types(clp, flags); + nfs_client_mark_return_unused_delegation_types(clp, flags); nfs_delegation_run_state_manager(clp); } -/** - * nfs_expire_all_delegations - * @clp: client to process - * - */ -void nfs_expire_all_delegations(struct nfs_client *clp) -{ - nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); -} - static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -574,7 +617,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d54d4fca6793..9a79c7a99d6d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -28,6 +28,7 @@ struct nfs_delegation { enum { NFS_DELEGATION_NEED_RECLAIM = 0, NFS_DELEGATION_RETURN, + NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, }; @@ -41,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); @@ -53,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f23f455be42b..e093e73178b7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; if (d_mountpoint(dentry)) goto no_open; + if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) + goto no_open; inode = dentry->d_inode; parent = dget_parent(dentry); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 29f4a48a0ee6..a87a44f84113 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -744,6 +744,7 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; + struct nfs_lock_context *l_ctx; int status; /* @@ -752,6 +753,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); + l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); + if (!IS_ERR(l_ctx)) { + status = nfs_iocounter_wait(&l_ctx->io_count); + nfs_put_lock_context(l_ctx); + if (status < 0) + return status; + } + /* NOTE: special case * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index dc0f98dfa717..c516da5873fd 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -726,9 +726,9 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, return ret; } -static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen) { - return key_instantiate_and_link(key, data, strlen(data) + 1, + return key_instantiate_and_link(key, data, datalen, id_resolver_cache->thread_keyring, authkey); } @@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; + size_t len; int ret = -ENOKEY; /* ret = -ENOKEY */ @@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, case IDMAP_CONV_NAMETOID: if (strcmp(upcall->im_name, im->im_name) != 0) break; - sprintf(id_str, "%d", im->im_id); - ret = nfs_idmap_instantiate(key, authkey, id_str); + /* Note: here we store the NUL terminator too */ + len = sprintf(id_str, "%d", im->im_id) + 1; + ret = nfs_idmap_instantiate(key, authkey, id_str, len); break; case IDMAP_CONV_IDTONAME: if (upcall->im_id != im->im_id) break; - ret = nfs_idmap_instantiate(key, authkey, im->im_name); + len = strlen(im->im_name); + ret = nfs_idmap_instantiate(key, authkey, im->im_name, len); break; default: ret = -EINVAL; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1f941674b089..c1c7a9d78722 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -561,20 +561,22 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); + nfs_iocounter_init(&l_ctx->io_count); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *pos; + struct nfs_lock_context *head = &ctx->lock_context; + struct nfs_lock_context *pos = head; - list_for_each_entry(pos, &ctx->lock_context.list, list) { + do { if (pos->lockowner.l_owner != current->files) continue; if (pos->lockowner.l_pid != current->tgid) continue; atomic_inc(&pos->count); return pos; - } + } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 541c9ebdbc5a..91e59a39fc08 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -229,6 +229,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); +int nfs_iocounter_wait(struct nfs_io_counter *c); + +static inline void nfs_iocounter_init(struct nfs_io_counter *c) +{ + c->flags = 0; + atomic_set(&c->io_count, 0); +} /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 944c9a5c1039..553a83cc4106 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -36,6 +36,7 @@ enum nfs4_client_state { struct nfs4_minor_version_ops { u32 minor_version; + unsigned init_caps; int (*call_sync)(struct rpc_clnt *clnt, struct nfs_server *server, @@ -143,12 +144,14 @@ struct nfs4_lock_state { enum { LK_STATE_IN_USE, NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_OPEN_STATE, /* OPEN stateid is set */ NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ + NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ }; struct nfs4_state { @@ -233,6 +236,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode); #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -347,13 +354,13 @@ extern int nfs4_wait_clnt_recover(struct nfs_client *clp); extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); -extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); +extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, +extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, fmode_t, const struct nfs_lockowner *); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); @@ -412,6 +419,11 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei return memcmp(dst, src, sizeof(*dst)) == 0; } +static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) +{ + return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac4fc9a8fdbc..f4d4d4ec6bf7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -198,6 +198,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; + if (clp->cl_minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, authflavour); if (error < 0) @@ -717,6 +719,19 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) goto out; + /* Set the basic capabilities */ + server->caps |= server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh); if (error < 0) @@ -760,9 +775,6 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; server->options = data->options; /* Get a client record */ @@ -779,13 +791,6 @@ static int nfs4_init_server(struct nfs_server *server, if (error < 0) goto error; - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); if (data->wsize) @@ -863,7 +868,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; /* Get a client representation. * Note: NFSv4 always uses TCP, */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 49eeb044c109..22d10623f5ee 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) { if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return; - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); pnfs_return_layout(inode); } @@ -159,11 +158,14 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(mds_server, state); + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(mds_server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + } nfs4_schedule_lease_recovery(mds_client); goto wait_on_recovery; /* DS session errors */ @@ -227,6 +229,9 @@ static int filelayout_async_handle_error(struct rpc_task *task, out: task->tk_status = 0; return -EAGAIN; +out_bad_stateid: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) @@ -300,6 +305,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(rdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_read(rdata); @@ -308,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - nfs41_setup_sequence(rdata->ds_clp->cl_session, + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -402,16 +414,23 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(wdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_write(wdata); rpc_exit(task, 0); return; } - nfs41_setup_sequence(wdata->ds_clp->cl_session, + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE); } static void filelayout_write_call_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2671cb0f901..c13144911d20 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -107,6 +107,8 @@ static int nfs4_map_errors(int err) return -EPROTONOSUPPORT; case -NFS4ERR_ACCESS: return -EACCES; + case -NFS4ERR_FILE_OPEN: + return -EBUSY; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -295,19 +297,30 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc } if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { + nfs_remove_bad_delegation(inode); + exception->retry = 1; + break; + } if (state == NULL) break; - nfs_remove_bad_delegation(state->inode); - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); @@ -760,6 +773,35 @@ struct nfs4_opendata { int cancelled; }; +static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + if (err != -EINVAL) + return false; + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + return false; + server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; + exception->retry = 1; + return true; +} + +static enum open_claim_type4 +nfs4_map_atomic_open_claim(struct nfs_server *server, + enum open_claim_type4 claim) +{ + if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) + return claim; + switch (claim) { + default: + return claim; + case NFS4_OPEN_CLAIM_FH: + return NFS4_OPEN_CLAIM_NULL; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + return NFS4_OPEN_CLAIM_DELEGATE_CUR; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + return NFS4_OPEN_CLAIM_DELEGATE_PREV; + } +} static void nfs4_init_opendata_res(struct nfs4_opendata *p) { @@ -775,6 +817,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, const struct iattr *attrs, + enum open_claim_type4 claim, gfp_t gfp_mask) { struct dentry *parent = dget_parent(dentry); @@ -793,7 +836,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); - p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS @@ -811,7 +853,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + switch (p->o_arg.claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + p->o_arg.fh = NFS_FH(dir); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + p->o_arg.fh = NFS_FH(dentry->d_inode); + } if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -924,6 +978,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + set_bit(NFS_OPEN_STATE, &state->flags); switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); @@ -1193,11 +1248,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state) +static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, + struct nfs4_state *state, enum open_claim_type4 claim) { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); + opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -1233,6 +1290,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * /* memory barrier prior to reading state->n_* */ clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1283,11 +1341,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state fmode_t delegation_type = 0; int status; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_PREVIOUS); if (IS_ERR(opendata)) return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; - opendata->o_arg.fh = NFS_FH(state->inode); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) @@ -1306,6 +1363,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -1320,71 +1379,72 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) { - struct nfs4_opendata *opendata; - int ret; - - opendata = nfs4_open_recoverdata_alloc(ctx, state); - if (IS_ERR(opendata)) - return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - ret = nfs4_open_recover(opendata, state); - nfs4_opendata_put(opendata); - return ret; + switch (err) { + default: + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); + case 0: + case -ENOENT: + case -ESTALE: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); + return -EAGAIN; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + set_bit(NFS_DELEGATED_STATE, &state->flags); + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_lease_recovery(server->nfs_client); + return -EAGAIN; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs_inode_find_state_and_recover(state->inode, + stateid); + nfs4_schedule_stateid_recovery(server, state); + return 0; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + return -EAGAIN; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + return 0; + } + return err; } int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { - struct nfs4_exception exception = { }; struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_opendata *opendata; int err; - do { - err = _nfs4_open_delegation_recall(ctx, state, stateid); - switch (err) { - case 0: - case -ENOENT: - case -ESTALE: - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - err = -EAGAIN; - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: - /* Don't recall a delegation if it was lost */ - nfs4_schedule_lease_recovery(server->nfs_client); - err = -EAGAIN; - goto out; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - nfs_inode_find_state_and_recover(state->inode, - stateid); - nfs4_schedule_stateid_recovery(server, state); - case -ENOMEM: - err = 0; - goto out; - } - set_bit(NFS_DELEGATED_STATE, &state->flags); - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); -out: - return err; + + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_DELEG_CUR_FH); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); + nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); + err = nfs4_open_recover(opendata, state); + nfs4_opendata_put(opendata); + return nfs4_handle_delegation_recall_error(server, state, stateid, err); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) @@ -1467,6 +1527,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; + struct nfs_client *clp = sp->so_server->nfs_client; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) goto out_wait; @@ -1487,7 +1548,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update client id. */ - data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; + data->o_arg.clientid = clp->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; @@ -1499,6 +1560,16 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) &data->o_res.seq_res, task) != 0) nfs_release_seqid(data->o_arg.seqid); + + /* Set the create mode (note dependency on the session type) */ + data->o_arg.createmode = NFS4_CREATE_UNCHECKED; + if (data->o_arg.open_flags & O_EXCL) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_GUARDED; + else if (clp->cl_mvops->minor_version > 0) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + } return; unlock_no_action: rcu_read_unlock(); @@ -1720,7 +1791,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1738,6 +1810,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; switch (err) { default: goto out; @@ -1758,7 +1832,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; @@ -1820,6 +1894,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); } return status; } @@ -1880,10 +1955,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { + if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) nfs4_schedule_stateid_recovery(server, state); - nfs4_wait_clnt_recover(server->nfs_client); - } *res = state; out: return ret; @@ -1905,6 +1978,7 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; + enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; int status; /* Protect against reboot recovery conflicts */ @@ -1920,7 +1994,10 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); + if (dentry->d_inode) + claim = NFS4_OPEN_CLAIM_FH; + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, + claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -1937,7 +2014,8 @@ static int _nfs4_do_open(struct inode *dir, if (status != 0) goto err_opendata_put; - if (opendata->o_arg.open_flags & O_EXCL) { + if ((opendata->o_arg.open_flags & O_EXCL) && + (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); nfs_fattr_init(opendata->o_res.f_attr); @@ -1978,6 +2056,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct rpc_cred *cred, struct nfs4_threshold **ctx_th) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; int status; @@ -2021,7 +2100,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + if (nfs4_clear_cap_atomic_open_v1(server, status, &exception)) + continue; + res = ERR_PTR(nfs4_handle_exception(server, status, &exception)); } while (exception.retry); return res; @@ -2053,7 +2134,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (state != NULL) { + if (state != NULL && nfs4_valid_open_stateid(state)) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, @@ -2129,11 +2210,19 @@ static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, fmode_t fmode) { spin_lock(&state->owner->so_lock); - if (!(fmode & FMODE_READ)) - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - if (!(fmode & FMODE_WRITE)) - clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); + switch (fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_WRITE: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_READ: + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case 0: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); + } spin_unlock(&state->owner->so_lock); } @@ -2201,6 +2290,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode &= ~FMODE_WRITE; } } + if (!nfs4_valid_open_stateid(state)) + call_close = 0; spin_unlock(&state->owner->so_lock); if (!call_close) { @@ -2211,8 +2302,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && - pnfs_roc_drain(inode, &calldata->roc_barrier, task)) + pnfs_roc_drain(inode, &calldata->roc_barrier, task)) { + nfs_release_seqid(calldata->arg.seqid); goto out_wait; + } } nfs_fattr_init(calldata->res.fattr); @@ -2632,7 +2725,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; if (pnfs_ld_layoutret_on_setattr(inode)) - pnfs_return_layout(inode); + pnfs_commit_and_return_layout(inode); nfs_fattr_init(fattr); @@ -3445,6 +3538,46 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +int nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; + return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); +} +EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); + +static bool nfs4_stateid_is_current(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + nfs4_stateid current_stateid; + + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) + return false; + return nfs4_stateid_match(stateid, ¤t_stateid); +} + +static bool nfs4_error_stateid_expired(int err) +{ + switch (err) { + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_OPENMODE: + case -NFS4ERR_EXPIRED: + return true; + } + return false; +} + void __nfs4_read_done_cb(struct nfs_read_data *data) { nfs_invalidate_atime(data->header->inode); @@ -3465,6 +3598,20 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) return 0; } +static bool nfs4_read_stateid_changed(struct rpc_task *task, + struct nfs_readargs *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_READ)) + return false; + rpc_restart_call_prepare(task); + return true; +} + static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { @@ -3472,7 +3619,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - + if (nfs4_read_stateid_changed(task, &data->args)) + return -EAGAIN; return data->read_done_cb ? data->read_done_cb(task, data) : nfs4_read_done_cb(task, data); } @@ -3487,10 +3635,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_READ); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3508,10 +3659,26 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data return 0; } +static bool nfs4_write_stateid_changed(struct rpc_task *task, + struct nfs_writeargs *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_WRITE)) + return false; + rpc_restart_call_prepare(task); + return true; +} + static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; + if (nfs4_write_stateid_changed(task, &data->args)) + return -EAGAIN; return data->write_done_cb ? data->write_done_cb(task, data) : nfs4_write_done_cb(task, data); } @@ -3551,10 +3718,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_WRITE); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -3656,7 +3826,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; data->client = clp; data->timestamp = jiffies; - return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, &nfs4_renew_ops, data); } @@ -3670,7 +3840,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) unsigned long now = jiffies; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status < 0) return status; do_renew_lease(clp, now); @@ -3980,11 +4150,14 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto stateid_invalid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto stateid_invalid; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); @@ -4016,6 +4189,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +stateid_invalid: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) @@ -4627,17 +4803,23 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } - data->arg.open_stateid = &state->stateid; + data->arg.open_stateid = &state->open_stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; } else data->arg.new_lock_owner = 0; + if (!nfs4_valid_open_stateid(state)) { + data->rpc_status = -EBADF; + task->tk_action = NULL; + goto out_release_open_seqid; + } data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, &data->res.seq_res, task) == 0) return; +out_release_open_seqid: nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); @@ -4983,58 +5165,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return status; } -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; int err; err = nfs4_set_lock_state(state, fl); if (err != 0) - goto out; - do { - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - switch (err) { - default: - printk(KERN_ERR "NFS: %s: unhandled error " - "%d.\n", __func__, err); - case 0: - case -ESTALE: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: - nfs4_schedule_lease_recovery(server->nfs_client); - err = -EAGAIN; - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - err = -EAGAIN; - goto out; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - nfs4_schedule_stateid_recovery(server, state); - err = 0; - goto out; - case -ENOMEM: - case -NFS4ERR_DENIED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - err = 0; - goto out; - } - set_bit(NFS_DELEGATED_STATE, &state->flags); - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); -out: - return err; + return err; + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + return nfs4_handle_delegation_recall_error(server, state, stateid, err); } struct nfs_release_lockowner_data { @@ -5848,7 +5988,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; if (!atomic_inc_not_zero(&clp->cl_count)) @@ -6416,22 +6556,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; - struct pnfs_layout_segment *lseg, *tmp; - unsigned long *bitlock = &NFS_I(data->args.inode)->flags; pnfs_cleanup_layoutcommit(data); - /* Matched by references in pnfs_set_layoutcommit */ - list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { - list_del_init(&lseg->pls_lc_list); - if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, - &lseg->pls_flags)) - pnfs_put_lseg(lseg); - } - - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); - smp_mb__after_clear_bit(); - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); - put_rpccred(data->cred); kfree(data); } @@ -6739,6 +6865,10 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK, .call_sync = _nfs4_call_sync, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, @@ -6750,6 +6880,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { #if defined(CONFIG_NFS_V4_1) static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..b7796950eceb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -699,6 +699,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) list_for_each_entry(state, &nfsi->open_states, inode_states) { if (state->owner != owner) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (atomic_inc_not_zero(&state->count)) return state; } @@ -987,13 +989,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, +static int nfs4_copy_lock_stateid(nfs4_stateid *dst, + struct nfs4_state *state, const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; pid_t fl_pid; - bool ret = false; + int ret = -ENOENT; if (lockowner == NULL) @@ -1008,7 +1011,10 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); - ret = true; + ret = 0; + smp_rmb(); + if (!list_empty(&lsp->ls_seqid.list)) + ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -1016,28 +1022,44 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + const nfs4_stateid *src; + int ret; int seq; do { + src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - nfs4_stateid_copy(dst, &state->stateid); + if (test_bit(NFS_OPEN_STATE, &state->flags)) + src = &state->open_stateid; + nfs4_stateid_copy(dst, src); + ret = 0; + smp_rmb(); + if (!list_empty(&state->owner->so_seqid.list)) + ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, +int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fmode_t fmode, const struct nfs_lockowner *lockowner) { + int ret = 0; if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) - return; - if (nfs4_copy_lock_stateid(dst, state, lockowner)) - return; - nfs4_copy_open_stateid(dst, state); + goto out; + ret = nfs4_copy_lock_stateid(dst, state, lockowner); + if (ret != -ENOENT) + goto out; + ret = nfs4_copy_open_stateid(dst, state); +out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; + return ret; } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) @@ -1286,14 +1308,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s return 1; } -void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) +int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; + if (!nfs4_valid_open_stateid(state)) + return -EBADF; nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); + return 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1323,6 +1348,27 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + set_bit(NFS_CONTEXT_BAD, &ctx->flags); + } + spin_unlock(&inode->i_lock); +} + +static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) +{ + set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); + nfs4_state_mark_open_context_bad(state); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1398,6 +1444,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (state->state == 0) continue; atomic_inc(&state->count); @@ -1430,11 +1478,10 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(&state->stateid, 0, - sizeof(state->stateid)); - /* Mark the file as being 'closed' */ - state->state = 0; + nfs4_state_mark_recovery_failed(state, status); break; + case -EAGAIN: + ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -1696,6 +1743,10 @@ static int nfs4_check_lease(struct nfs_client *clp) } status = ops->renew_lease(clp, cred); put_rpccred(cred); + if (status == -ETIMEDOUT) { + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + return 0; + } out: return nfs4_recovery_handle_error(clp, status); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3edda554ac7..c2cbf0d90a31 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1058,8 +1058,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_ATIME_SET) { bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_atime.tv_sec); + p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); } else if (iap->ia_valid & ATTR_ATIME) { @@ -1069,8 +1068,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_MTIME_SET) { bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); + p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); } else if (iap->ia_valid & ATTR_MTIME) { @@ -1366,33 +1364,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) { + struct iattr dummy; __be32 *p; - struct nfs_client *clp; p = reserve_space(xdr, 4); - switch(arg->open_flags & O_EXCL) { - case 0: + switch(arg->createmode) { + case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); encode_attrs(xdr, arg->u.attrs, arg->server); break; - default: - clp = arg->server->nfs_client; - if (clp->cl_mvops->minor_version > 0) { - if (nfs4_has_persistent_session(clp)) { - *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->server); - } else { - struct iattr dummy; - - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); - encode_nfs4_verifier(xdr, &arg->u.verifier); - dummy.ia_valid = 0; - encode_attrs(xdr, &dummy, arg->server); - } - } else { - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); - } + case NFS4_CREATE_GUARDED: + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + case NFS4_CREATE_EXCLUSIVE: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->server); } } @@ -1459,6 +1452,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc encode_string(xdr, name->len, name->name); } +static inline void encode_claim_fh(struct xdr_stream *xdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_FH); +} + +static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH); + encode_nfs4_stateid(xdr, stateid); +} + static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); @@ -1474,6 +1484,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, case NFS4_OPEN_CLAIM_DELEGATE_CUR: encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); break; + case NFS4_OPEN_CLAIM_FH: + encode_claim_fh(xdr); + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + encode_claim_delegate_cur_fh(xdr, &arg->u.delegation); + break; default: BUG(); } @@ -1506,35 +1522,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) -{ - nfs4_stateid stateid; - - if (ctx->state != NULL) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, lockowner); - if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); - } else - encode_nfs4_stateid(xdr, &zero_stateid); -} - static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1670,8 +1663,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg __be32 *p; encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); @@ -3497,8 +3489,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) if (n == 0) goto root_path; dprintk("pathname4: "); - path->ncomponents = 0; - while (path->ncomponents < n) { + if (n > NFS4_PATHNAME_MAXCOMPONENTS) { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) @@ -3507,12 +3502,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) pr_cont("%s%.*s ", (path->ncomponents != n ? "/ " : ""), component->len, component->data); - if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) - path->ncomponents++; - else { - dprintk("cannot parse %d components in path\n", n); - goto out_eio; - } } out: return status; @@ -3557,27 +3546,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st n = be32_to_cpup(p); if (n <= 0) goto out_eio; - res->nlocations = 0; - while (res->nlocations < n) { + for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; - struct nfs4_fs_location *loc = &res->locations[res->nlocations]; + struct nfs4_fs_location *loc; + if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES) + break; + loc = &res->locations[res->nlocations]; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; m = be32_to_cpup(p); - loc->nservers = 0; dprintk("%s: servers:\n", __func__); - while (loc->nservers < m) { - struct nfs4_string *server = &loc->servers[loc->nservers]; - status = decode_opaque_inline(xdr, &server->len, &server->data); - if (unlikely(status != 0)) - goto out_eio; - dprintk("%s ", server->data); - if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) - loc->nservers++; - else { + for (loc->nservers = 0; loc->nservers < m; loc->nservers++) { + struct nfs4_string *server; + + if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) { unsigned int i; dprintk("%s: using first %u of %u servers " "returned for location %u\n", @@ -3591,13 +3576,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(status != 0)) goto out_eio; } + break; } + server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); } status = decode_pathname(xdr, &loc->rootpath); if (unlikely(status != 0)) goto out_eio; - if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) - res->nlocations++; } if (res->nlocations != 0) status = NFS_ATTR_FATTR_V4_LOCATIONS; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e56e846e9d2d..29cfb7ade121 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -84,6 +84,55 @@ nfs_page_free(struct nfs_page *p) kmem_cache_free(nfs_page_cachep, p); } +static void +nfs_iocounter_inc(struct nfs_io_counter *c) +{ + atomic_inc(&c->io_count); +} + +static void +nfs_iocounter_dec(struct nfs_io_counter *c) +{ + if (atomic_dec_and_test(&c->io_count)) { + clear_bit(NFS_IO_INPROGRESS, &c->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&c->flags, NFS_IO_INPROGRESS); + } +} + +static int +__nfs_iocounter_wait(struct nfs_io_counter *c) +{ + wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); + DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); + int ret = 0; + + do { + prepare_to_wait(wq, &q.wait, TASK_KILLABLE); + set_bit(NFS_IO_INPROGRESS, &c->flags); + if (atomic_read(&c->io_count) == 0) + break; + ret = nfs_wait_bit_killable(&c->flags); + } while (atomic_read(&c->io_count) != 0); + finish_wait(wq, &q.wait); + return ret; +} + +/** + * nfs_iocounter_wait - wait for i/o to complete + * @c: nfs_io_counter to use + * + * returns -ERESTARTSYS if interrupted by a fatal signal. + * Otherwise returns 0 once the io_count hits 0. + */ +int +nfs_iocounter_wait(struct nfs_io_counter *c) +{ + if (atomic_read(&c->io_count) == 0) + return 0; + return __nfs_iocounter_wait(c); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use @@ -104,6 +153,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *req; struct nfs_lock_context *l_ctx; + if (test_bit(NFS_CONTEXT_BAD, &ctx->flags)) + return ERR_PTR(-EBADF); /* try to allocate the request struct */ req = nfs_page_alloc(); if (req == NULL) @@ -116,6 +167,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_CAST(l_ctx); } req->wb_lock_context = l_ctx; + nfs_iocounter_inc(&l_ctx->io_count); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in @@ -175,6 +227,7 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { + nfs_iocounter_dec(&l_ctx->io_count); nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 48ac5aad6258..c5bd758e5637 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range, lo_seg_intersecting(lseg_range, recall_range); } +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + if (!atomic_dec_and_test(&lseg->pls_refcount)) + return false; + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); + list_add(&lseg->pls_list, tmp_list); + return true; +} + /* Returns 1 if lseg is removed from list, 0 otherwise */ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) @@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); - if (atomic_dec_and_test(&lseg->pls_refcount)) { - pnfs_layout_remove_lseg(lseg->pls_layout, lseg); - list_add(&lseg->pls_list, tmp_list); + if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; - } } return rv; } @@ -711,6 +718,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, spin_lock(&lo->plh_inode->i_lock); if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; + } else if (!nfs4_valid_open_stateid(open_state)) { + status = -EBADF; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -777,6 +786,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, return lseg; } +static void pnfs_clear_layoutcommit(struct inode *inode, + struct list_head *head) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg, *tmp; + + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + return; + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { + if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + continue; + pnfs_lseg_dec_and_remove_zero(lseg, head); + } +} + /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -808,6 +832,7 @@ _pnfs_return_layout(struct inode *ino) /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); + pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { @@ -820,8 +845,6 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; @@ -845,6 +868,33 @@ _pnfs_return_layout(struct inode *ino) } EXPORT_SYMBOL_GPL(_pnfs_return_layout); +int +pnfs_commit_and_return_layout(struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + int ret; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo == NULL) { + spin_unlock(&inode->i_lock); + return 0; + } + pnfs_get_layout_hdr(lo); + /* Block new layoutgets and read/write to ds */ + lo->plh_block_lgets++; + spin_unlock(&inode->i_lock); + filemap_fdatawait(inode->i_mapping); + ret = pnfs_layoutcommit_inode(inode, true); + if (ret == 0) + ret = _pnfs_return_layout(inode); + spin_lock(&inode->i_lock); + lo->plh_block_lgets--; + spin_unlock(&inode->i_lock); + pnfs_put_layout_hdr(lo); + return ret; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; @@ -1458,7 +1508,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -1613,7 +1662,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -1746,11 +1794,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { if (lseg->pls_range.iomode == IOMODE_RW && - test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) list_add(&lseg->pls_lc_list, listp); } } +static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) +{ + struct pnfs_layout_segment *lseg, *tmp; + unsigned long *bitlock = &NFS_I(inode)->flags; + + /* Matched by references in pnfs_set_layoutcommit */ + list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { + list_del_init(&lseg->pls_lc_list); + pnfs_put_lseg(lseg); + } + + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); +} + void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); @@ -1795,6 +1859,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) if (nfss->pnfs_curr_ld->cleanup_layoutcommit) nfss->pnfs_curr_ld->cleanup_layoutcommit(data); + pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94ba80417748..f5f8a470a647 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); +int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_write_data *); void pnfs_ld_read_done(struct nfs_read_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, @@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline int pnfs_commit_and_return_layout(struct inode *inode) +{ + return 0; +} + static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5e5d9899d56..70a26c651f09 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -514,6 +514,8 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } static const struct rpc_call_ops nfs_read_common_ops = { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c483cc50b82e..a2c7c28049d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1251,6 +1251,8 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1cc25682b20b..fc01d5cb4cf1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -59,11 +59,18 @@ struct nfs_lockowner { pid_t l_pid; }; +#define NFS_IO_INPROGRESS 0 +struct nfs_io_counter { + unsigned long flags; + atomic_t io_count; +}; + struct nfs_lock_context { atomic_t count; struct list_head list; struct nfs_open_context *open_context; struct nfs_lockowner lockowner; + struct nfs_io_counter io_count; }; struct nfs4_state; @@ -77,6 +84,7 @@ struct nfs_open_context { unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) +#define NFS_CONTEXT_BAD (2) int error; struct list_head list; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6c6ed153a9b4..3b7fa2abecca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -40,6 +40,7 @@ struct nfs_client { #define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ #define NFS_CS_MIGRATION 2 /* - transparent state migr */ +#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -197,5 +198,7 @@ struct nfs_server { #define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) +#define NFS_CAP_STATEID_NFSV41 (1U << 16) +#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4b993d358dad..bdc100f66dfb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -349,6 +349,7 @@ struct nfs_openargs { const u32 * bitmask; const u32 * open_bitmap; __u32 claim; + enum createmode4 createmode; }; struct nfs_openres { @@ -486,6 +487,7 @@ struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; unsigned int pgbase; @@ -507,6 +509,7 @@ struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; enum nfs3_stable_how stable; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2cf4ffaa3cd4..e7d492ce7c18 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) #define RPC_CLNT_CREATE_QUIET (1UL << 6) +#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 30834be03011..ff5392421cb2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -255,6 +255,8 @@ static inline int bc_prealloc(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +#define XPRT_CREATE_INFINITE_SLOTS (1U) + struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ struct net * net; @@ -263,6 +265,7 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + unsigned int flags; }; struct xprt_class { @@ -279,6 +282,7 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); @@ -334,6 +338,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) +#define XPRT_CONGESTED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 516fe2caac2c..262caf03bd5f 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -24,7 +24,6 @@ config SUNRPC_XPRT_RDMA config SUNRPC_SWAP bool depends on SUNRPC - select NETVM config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..651245aa829a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) + xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; /* * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. @@ -1306,6 +1308,8 @@ call_reserve(struct rpc_task *task) xprt_reserve(task); } +static void call_retry_reserve(struct rpc_task *task); + /* * 1b. Grok the result of xprt_reserve() */ @@ -1347,7 +1351,7 @@ call_reserveresult(struct rpc_task *task) case -ENOMEM: rpc_delay(task, HZ >> 2); case -EAGAIN: /* woken up; retry */ - task->tk_action = call_reserve; + task->tk_action = call_retry_reserve; return; case -EIO: /* probably a shutdown */ break; @@ -1359,6 +1363,19 @@ call_reserveresult(struct rpc_task *task) rpc_exit(task, status); } +/* + * 1c. Retry reserving an RPC call slot + */ +static void +call_retry_reserve(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_reserveresult; + xprt_retry_reserve(task); +} + /* * 2. Bind and/or refresh the credentials */ @@ -1644,22 +1661,26 @@ call_connect_status(struct rpc_task *task) dprint_status(task); - task->tk_status = 0; - if (status >= 0 || status == -EAGAIN) { - clnt->cl_stats->netreconn++; - task->tk_action = call_transmit; - return; - } - trace_rpc_connect_status(task, status); switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: task->tk_action = call_timeout; - break; - default: - rpc_exit(task, -EIO); + return; + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) + break; + /* retry with existing socket, after a delay */ + case 0: + case -EAGAIN: + task->tk_status = 0; + clnt->cl_stats->netreconn++; + task->tk_action = call_transmit; + return; } + rpc_exit(task, status); } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25ddec9..f8529fc8e542 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b7478d5e7ffd..745fca3cfd36 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +{ + set_bit(XPRT_CONGESTED, &xprt->state); + rpc_sleep_on(&xprt->backlog, task, NULL); +} + +static void xprt_wake_up_backlog(struct rpc_xprt *xprt) +{ + if (rpc_wake_up_next(&xprt->backlog) == NULL) + clear_bit(XPRT_CONGESTED, &xprt->state); +} + +static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) +{ + bool ret = false; + + if (!test_bit(XPRT_CONGESTED, &xprt->state)) + goto out; + spin_lock(&xprt->reserve_lock); + if (test_bit(XPRT_CONGESTED, &xprt->state)) { + rpc_sleep_on(&xprt->backlog, task, NULL); + ret = true; + } + spin_unlock(&xprt->reserve_lock); +out: + return ret; +} + static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); @@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) task->tk_status = -ENOMEM; break; case -EAGAIN: - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); dprintk("RPC: waiting for request slot\n"); default: task->tk_status = -EAGAIN; @@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) memset(req, 0, sizeof(*req)); /* mark unused */ list_add(&req->rq_list, &xprt->free); } - rpc_wake_up_next(&xprt->backlog); + xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } @@ -1092,13 +1120,40 @@ EXPORT_SYMBOL_GPL(xprt_free); * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation * - * If no more slots are available, place the task on the transport's + * If the transport is marked as being congested, or if no more + * slots are available, place the task on the transport's * backlog queue. */ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); + if (!xprt_throttle_congested(xprt, task)) + xprt->ops->alloc_slot(xprt, task); + rcu_read_unlock(); +} + +/** + * xprt_retry_reserve - allocate an RPC request slot + * @task: RPC task requesting a slot allocation + * + * If no more slots are available, place the task on the transport's + * backlog queue. + * Note that the only difference with xprt_reserve is that we now + * ignore the value of the XPRT_CONGESTED flag. + */ +void xprt_retry_reserve(struct rpc_task *task) +{ + struct rpc_xprt *xprt; + task->tk_status = 0; if (task->tk_rqstp != NULL) return; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3d02130828da..9c2825827dec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2207,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ xs_tcp_force_close(xprt); break; - case -ECONNREFUSED: - case -ECONNRESET: - case -ENETUNREACH: - /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: @@ -2221,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work) /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. */ + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ goto out; } out_eagain: @@ -2767,9 +2767,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct rpc_xprt *xprt; struct sock_xprt *transport; struct rpc_xprt *ret; + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; + + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, - xprt_max_tcp_slot_table_entries); + max_slot_table_size); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt);