From c1326210477ecc06c53221f0005c64419aba30d6 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:20 +0000 Subject: [PATCH 01/13] nfs,nfsd: NFSv4.2 extended attribute protocol definitions Add definitions for the new operations, errors and flags as defined in RFC 8276 (File System Extended Attributes in NFSv4). Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- include/linux/nfs4.h | 20 ++++++++++++++++++++ include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 23 insertions(+) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4dba3c948932..e6ca9d1d2e76 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -150,6 +150,12 @@ enum nfs_opnum4 { OP_WRITE_SAME = 70, OP_CLONE = 71, + /* xattr support (RFC8726) */ + OP_GETXATTR = 72, + OP_SETXATTR = 73, + OP_LISTXATTRS = 74, + OP_REMOVEXATTR = 75, + OP_ILLEGAL = 10044, }; @@ -280,6 +286,10 @@ enum nfsstat4 { NFS4ERR_WRONG_LFS = 10092, NFS4ERR_BADLABEL = 10093, NFS4ERR_OFFLOAD_NO_REQS = 10094, + + /* xattr (RFC8276) */ + NFS4ERR_NOXATTR = 10095, + NFS4ERR_XATTR2BIG = 10096, }; static inline bool seqid_mutating_err(u32 err) @@ -452,6 +462,7 @@ enum change_attr_type4 { #define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) +#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) @@ -700,4 +711,13 @@ struct nl4_server { struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ } u; }; + +/* + * Options for setxattr. These match the flags for setxattr(2). + */ +enum nfs4_setxattr_options { + SETXATTR4_EITHER = 0, + SETXATTR4_CREATE = 1, + SETXATTR4_REPLACE = 2, +}; #endif diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..bf197e99b98f 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -33,6 +33,9 @@ #define NFS4_ACCESS_EXTEND 0x0008 #define NFS4_ACCESS_DELETE 0x0010 #define NFS4_ACCESS_EXECUTE 0x0020 +#define NFS4_ACCESS_XAREAD 0x0040 +#define NFS4_ACCESS_XAWRITE 0x0080 +#define NFS4_ACCESS_XALIST 0x0100 #define NFS4_FH_PERSISTENT 0x0000 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001 From 9a67fcc8f3fd1e294922f28f20003c31d7f6cfeb Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:53 +0000 Subject: [PATCH 02/13] nfs: add client side only definitions for user xattrs Add client-side only definitions for user extended attributes (RFC8276). These are the access bits as used by the client code, and the CLNT procedure number definition. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 5 +++++ include/linux/nfs_fs.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e6ca9d1d2e76..db13026ac7d1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -553,6 +553,11 @@ enum { NFSPROC4_CLNT_LAYOUTERROR, NFSPROC4_CLNT_COPY_NOTIFY, + + NFSPROC4_CLNT_GETXATTR, + NFSPROC4_CLNT_SETXATTR, + NFSPROC4_CLNT_LISTXATTRS, + NFSPROC4_CLNT_REMOVEXATTR, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6ee9119acc5d..b743988fcbd0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -212,6 +212,9 @@ struct nfs4_copy_state { #define NFS_ACCESS_EXTEND 0x0008 #define NFS_ACCESS_DELETE 0x0010 #define NFS_ACCESS_EXECUTE 0x0020 +#define NFS_ACCESS_XAREAD 0x0040 +#define NFS_ACCESS_XAWRITE 0x0080 +#define NFS_ACCESS_XALIST 0x0100 /* * Cache validity bit flags From 04a5da690e8f2da23c2ac940f2921e3aa622db82 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:54 +0000 Subject: [PATCH 03/13] NFSv4.2: define limits and sizes for user xattr handling Set limits for extended attributes (attribute value size and listxattr buffer size), based on the fs-independent limits (XATTR_*_MAX). Define the maximum XDR sizes for the RFC 8276 XATTR operations. In the case of operations that carry a larger payload (SETXATTR, GETXATTR, LISTXATTR), these exclude that payload, which is added as separate pages, like other operations do. Define, much like for read and write operations, the maximum overhead sizes for get/set/listxattr, and use them to limit the maximum payload size for those operations, in combination with the channel attributes. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 19 ++++++++-- fs/nfs/nfs42.h | 16 +++++++++ fs/nfs/nfs42xdr.c | 74 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 6 ++++ fs/nfs/nfs4client.c | 31 ++++++++++++++++ include/linux/nfs_fs_sb.h | 5 +++ 6 files changed, 149 insertions(+), 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f1ff3076e4a4..055040bf1a8e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -50,6 +50,7 @@ #include "nfs.h" #include "netns.h" #include "sysfs.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -749,7 +750,7 @@ static int nfs_init_server(struct nfs_server *server, static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) { - unsigned long max_rpc_payload; + unsigned long max_rpc_payload, raw_max_rpc_payload; /* Work out a lot of parameters */ if (server->rsize == 0) @@ -762,7 +763,9 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) server->wsize = nfs_block_size(fsinfo->wtmax, NULL); - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + raw_max_rpc_payload = rpc_max_payload(server->client); + max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL); + if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; if (server->rsize > NFS_MAX_FILE_IO_SIZE) @@ -795,6 +798,18 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + +#ifdef CONFIG_NFS_V4_2 + /* + * Defaults until limited by the session parameters. + */ + server->gxasize = min_t(unsigned int, raw_max_rpc_payload, + XATTR_SIZE_MAX); + server->sxasize = min_t(unsigned int, raw_max_rpc_payload, + XATTR_SIZE_MAX); + server->lxasize = min_t(unsigned int, raw_max_rpc_payload, + nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); +#endif } /* diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index c891af949886..51de8ddc7d88 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +#include + /* * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support * more? Need to consider not to pre-alloc too much for a compound. @@ -36,5 +38,19 @@ static inline bool nfs42_files_from_same_server(struct file *in, return nfs4_check_serverowner_major_id(c_in->cl_serverowner, c_out->cl_serverowner); } + +/* + * Maximum XDR buffer size needed for a listxattr buffer of buflen size. + * + * The upper boundary is a buffer with all 1-byte sized attribute names. + * They would be 7 bytes long in the eventual buffer ("user.x\0"), and + * 8 bytes long XDR-encoded. + * + * Include the trailing eof word as well. + */ +static inline u32 nfs42_listxattr_xdrsize(u32 buflen) +{ + return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; +} #endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c03f3246d6c5..6712daa9d85b 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -169,6 +169,80 @@ decode_clone_maxsz + \ decode_getattr_maxsz) +#ifdef CONFIG_NFS_V4_2 +/* Not limited by NFS itself, limited by the generic xattr code */ +#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) + +#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_xattr_name_maxsz + 1) +#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) +#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1) +#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) + +#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getxattr_maxsz) +#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getxattr_maxsz) +#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setxattr_maxsz) +#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setxattr_maxsz) +#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_listxattrs_maxsz) +#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_listxattrs_maxsz) +#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_removexattr_maxsz) +#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_removexattr_maxsz) + +/* + * These values specify the maximum amount of data that is not + * associated with the extended attribute name or extended + * attribute list in the SETXATTR, GETXATTR and LISTXATTR + * respectively. + */ +const u32 nfs42_maxsetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_encode_hdr_maxsz + + encode_sequence_maxsz + + encode_putfh_maxsz + 1 + + nfs4_xattr_name_maxsz) + * XDR_UNIT); + +const u32 nfs42_maxgetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz + 1) * XDR_UNIT); + +const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz + 3) * XDR_UNIT); +#endif + static void encode_fallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args) { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2b7f6dcd2eb8..526b3e70d57c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -557,6 +557,12 @@ static inline void nfs4_unregister_sysctl(void) /* nfs4xdr.c */ extern const struct rpc_procinfo nfs4_procedures[]; +#ifdef CONFIG_NFS_V4_2 +extern const u32 nfs42_maxsetxattr_overhead; +extern const u32 nfs42_maxgetxattr_overhead; +extern const u32 nfs42_maxlistxattrs_overhead; +#endif + struct nfs4_mount_data; /* callback_xdr.c */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0bd77cc1f639..c41cbd86612c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -992,6 +992,36 @@ static void nfs4_session_limit_rwsize(struct nfs_server *server) #endif /* CONFIG_NFS_V4_1 */ } +/* + * Limit xattr sizes using the channel attributes. + */ +static void nfs4_session_limit_xasize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_2 + struct nfs4_session *sess; + u32 server_gxa_sz; + u32 server_sxa_sz; + u32 server_lxa_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + + sess = server->nfs_client->cl_session; + + server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead; + server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead; + server_lxa_sz = sess->fc_attrs.max_resp_sz - + nfs42_maxlistxattrs_overhead; + + if (server->gxasize > server_gxa_sz) + server->gxasize = server_gxa_sz; + if (server->sxasize > server_sxa_sz) + server->sxasize = server_sxa_sz; + if (server->lxasize > server_lxa_sz) + server->lxasize = server_lxa_sz; +#endif +} + static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1039,6 +1069,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, goto out; nfs4_session_limit_rwsize(server); + nfs4_session_limit_xasize(server); if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 465fa98258a3..128e01acb4ca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -163,6 +163,11 @@ struct nfs_server { unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ unsigned int bsize; /* server block size */ +#ifdef CONFIG_NFS_V4_2 + unsigned int gxasize; /* getxattr size */ + unsigned int sxasize; /* setxattr size */ + unsigned int lxasize; /* listxattr size */ +#endif unsigned int acregmin; /* attr cache timeouts */ unsigned int acregmax; unsigned int acdirmin; From b78ef845c35dbae25e57b598901a65b13d940c81 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:55 +0000 Subject: [PATCH 04/13] NFSv4.2: query the server for extended attribute support Query the server for extended attribute support, and record it as the NFS_CAP_XATTR flag in the server capabilities. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +++ fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4xdr.c | 25 +++++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 055040bf1a8e..4b8cc93913f7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -809,6 +809,9 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, XATTR_SIZE_MAX); server->lxasize = min_t(unsigned int, raw_max_rpc_payload, nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; #endif } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e32717fd1169..64e081459327 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -256,6 +256,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_CLONE_BLKSIZE + | FATTR4_WORD2_XATTR_SUPPORT }; const u32 nfs4_fs_locations_bitmap[3] = { @@ -3740,7 +3741,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 47817ef0aadb..9e1b07640e9a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4201,6 +4201,26 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } +static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[2] & (FATTR4_WORD2_XATTR_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_XATTR_SUPPORT)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; + } + dprintk("%s: XATTR support=%s\n", __func__, + *res == 0 ? "false" : "true"); + return 0; +} + static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); @@ -4855,6 +4875,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if (status) goto xdr_error; + status = decode_attr_xattrsupport(xdr, bitmap, + &fsinfo->xattr_support); + if (status) + goto xdr_error; + status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 128e01acb4ca..7eae72a8762e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -286,5 +286,6 @@ struct nfs_server { #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) +#define NFS_CAP_XATTR (1U << 28) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5fd0a9ef425f..dee9b1cfa972 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -150,6 +150,7 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + __u32 xattr_support; /* User xattrs supported */ }; struct nfs_fsstat { From 3e1f02123fba086d32dfd5729e6f4e2b54654acc Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:56 +0000 Subject: [PATCH 05/13] NFSv4.2: add client side XDR handling for extended attributes Define the argument and response structures that will be used for RFC 8276 extended attribute RPC calls, and implement the necessary functions to encode/decode the extended attribute operations. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 368 +++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 6 + include/linux/nfs_xdr.h | 59 ++++++- 3 files changed, 430 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6712daa9d85b..cc50085e151c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -169,7 +169,6 @@ decode_clone_maxsz + \ decode_getattr_maxsz) -#ifdef CONFIG_NFS_V4_2 /* Not limited by NFS itself, limited by the generic xattr code */ #define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) @@ -241,7 +240,6 @@ const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_decode_hdr_maxsz + decode_sequence_maxsz + decode_putfh_maxsz + 3) * XDR_UNIT); -#endif static void encode_fallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args) @@ -407,6 +405,210 @@ static void encode_layouterror(struct xdr_stream *xdr, encode_device_error(xdr, &args->errors[0]); } +static void encode_setxattr(struct xdr_stream *xdr, + const struct nfs42_setxattrargs *arg, + struct compound_hdr *hdr) +{ + __be32 *p; + + BUILD_BUG_ON(XATTR_CREATE != SETXATTR4_CREATE); + BUILD_BUG_ON(XATTR_REPLACE != SETXATTR4_REPLACE); + + encode_op_hdr(xdr, OP_SETXATTR, decode_setxattr_maxsz, hdr); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(arg->xattr_flags); + encode_string(xdr, strlen(arg->xattr_name), arg->xattr_name); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(arg->xattr_len); + if (arg->xattr_len) + xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len); +} + +static int decode_setxattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_SETXATTR); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + + +static void encode_getxattr(struct xdr_stream *xdr, const char *name, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_GETXATTR, decode_getxattr_maxsz, hdr); + encode_string(xdr, strlen(name), name); +} + +static int decode_getxattr(struct xdr_stream *xdr, + struct nfs42_getxattrres *res, + struct rpc_rqst *req) +{ + int status; + __be32 *p; + u32 len, rdlen; + + status = decode_op_hdr(xdr, OP_GETXATTR); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > req->rq_rcv_buf.page_len) + return -ERANGE; + + res->xattr_len = len; + + if (len > 0) { + rdlen = xdr_read_pages(xdr, len); + if (rdlen < len) + return -EIO; + } + + return 0; +} + +static void encode_removexattr(struct xdr_stream *xdr, const char *name, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_REMOVEXATTR, decode_removexattr_maxsz, hdr); + encode_string(xdr, strlen(name), name); +} + + +static int decode_removexattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVEXATTR); + if (status) + goto out; + + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static void encode_listxattrs(struct xdr_stream *xdr, + const struct nfs42_listxattrsargs *arg, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr); + + p = reserve_space(xdr, 12); + if (unlikely(!p)) + return; + + p = xdr_encode_hyper(p, arg->cookie); + /* + * RFC 8276 says to specify the full max length of the LISTXATTRS + * XDR reply. Count is set to the XDR length of the names array + * plus the EOF marker. So, add the cookie and the names count. + */ + *p = cpu_to_be32(arg->count + 8 + 4); +} + +static int decode_listxattrs(struct xdr_stream *xdr, + struct nfs42_listxattrsres *res) +{ + int status; + __be32 *p; + u32 count, len, ulen; + size_t left, copied; + char *buf; + + status = decode_op_hdr(xdr, OP_LISTXATTRS); + if (status) { + /* + * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL + * should be translated to ERANGE. + */ + if (status == -ETOOSMALL) + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + return -EIO; + + xdr_decode_hyper(p, &res->cookie); + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + left = res->xattr_len; + buf = res->xattr_buf; + + count = be32_to_cpup(p); + copied = 0; + + /* + * We have asked for enough room to encode the maximum number + * of possible attribute names, so everything should fit. + * + * But, don't rely on that assumption. Just decode entries + * until they don't fit anymore, just in case the server did + * something odd. + */ + while (count--) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EIO; + + ulen = len + XATTR_USER_PREFIX_LEN + 1; + if (buf) { + if (ulen > left) { + status = -ERANGE; + goto out; + } + + memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); + + buf[ulen - 1] = 0; + buf += ulen; + left -= ulen; + } + copied += ulen; + } + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + res->eof = be32_to_cpup(p); + res->copied = copied; + +out: + if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) + status = -E2BIG; + + return status; +} + /* * Encode ALLOCATE request */ @@ -1062,4 +1264,166 @@ static int nfs4_xdr_dec_layouterror(struct rpc_rqst *rqstp, return status; } +#ifdef CONFIG_NFS_V4_2 +static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_setxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setxattr(xdr, args, &hdr); + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + void *data) +{ + struct nfs42_setxattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, req); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + + status = decode_setxattr(xdr, &res->cinfo); +out: + return status; +} + +static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_getxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + size_t plen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getxattr(xdr, args->xattr_name, &hdr); + + plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, + hdr.replen); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_getxattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_getxattr(xdr, res, rqstp); +out: + return status; +} + +static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_listxattrsargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_listxattrs(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, + hdr.replen); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_listxattrsres *res = data; + struct compound_hdr hdr; + int status; + + xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_listxattrs(xdr, res); +out: + return status; +} + +static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_removexattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_removexattr(xdr, args->xattr_name, &hdr); + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_removexattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, req); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + + status = decode_removexattr(xdr, &res->cinfo); +out: + return status; +} +#endif #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9e1b07640e9a..388ac520b104 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7481,6 +7481,8 @@ static struct { { NFS4ERR_SYMLINK, -ELOOP }, { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, { NFS4ERR_DEADLOCK, -EDEADLK }, + { NFS4ERR_NOXATTR, -ENODATA }, + { NFS4ERR_XATTR2BIG, -E2BIG }, { -1, -EIO } }; @@ -7609,6 +7611,10 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), + PROC42(GETXATTR, enc_getxattr, dec_getxattr), + PROC42(SETXATTR, enc_setxattr, dec_setxattr), + PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), + PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dee9b1cfa972..9408f3252c8e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1498,7 +1498,64 @@ struct nfs42_seek_res { u32 sr_eof; u64 sr_offset; }; -#endif + +struct nfs42_setxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + u32 xattr_flags; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_setxattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +struct nfs42_getxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_getxattrres { + struct nfs4_sequence_res seq_res; + size_t xattr_len; +}; + +struct nfs42_listxattrsargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + u32 count; + u64 cookie; + struct page **xattr_pages; +}; + +struct nfs42_listxattrsres { + struct nfs4_sequence_res seq_res; + struct page *scratch; + void *xattr_buf; + size_t xattr_len; + u64 cookie; + bool eof; + size_t copied; +}; + +struct nfs42_removexattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; +}; + +struct nfs42_removexattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +#endif /* CONFIG_NFS_V4_2 */ struct nfs_page; From d2ae4f8b21c111bb795c557588d89dccd005828d Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:57 +0000 Subject: [PATCH 06/13] nfs: define nfs_access_get_cached function The only consumer of nfs_access_get_cached_rcu and nfs_access_cached calls these static functions in order to first try RCU access, and then locked access. Combine them in to a single function, and call that. Make this function available to the rest of the NFS code. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 20 ++++++++++++++++---- include/linux/nfs_fs.h | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5a331da5f55a..f04fc0f7843b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2460,7 +2460,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } -static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2533,6 +2533,20 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre return err; } +int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct +nfs_access_entry *res, bool may_block) +{ + int status; + + status = nfs_access_get_cached_rcu(inode, cred, res); + if (status != 0) + status = nfs_access_get_cached_locked(inode, cred, res, + may_block); + + return status; +} +EXPORT_SYMBOL_GPL(nfs_access_get_cached); + static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2647,9 +2661,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached_rcu(inode, cred, &cache); - if (status != 0) - status = nfs_access_get_cached(inode, cred, &cache, may_block); + status = nfs_access_get_cached(inode, cred, &cache, may_block); if (status == 0) goto out_cached; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b743988fcbd0..714b577dce19 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -493,6 +493,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, + bool may_block); /* * linux/fs/nfs/symlink.c From 72832a2453d9ca752beedb3a4fb2fc82e375c46c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:58 +0000 Subject: [PATCH 07/13] NFSv4.2: query the extended attribute access bits RFC 8276 defines separate ACCESS bits for extended attribute checking. Query them in nfs_do_access and opendata. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ fs/nfs/nfs4proc.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f04fc0f7843b..a12f42e7d8c7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2673,6 +2673,10 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) * Determine which access bits we want to ask for... */ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; + if (nfs_server_capable(inode, NFS_CAP_XATTR)) { + cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE | + NFS_ACCESS_XALIST; + } if (S_ISDIR(inode->i_mode)) cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; else diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 64e081459327..c88351f8b18d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1341,6 +1341,12 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; +#ifdef CONFIG_NFS_V4_2 + if (server->caps & NFS_CAP_XATTR) + p->o_arg.access |= NFS4_ACCESS_XAREAD | + NFS4_ACCESS_XAWRITE | + NFS4_ACCESS_XALIST; +#endif } } p->o_arg.clientid = server->nfs_client->cl_clientid; From 1b523ca972edba2025e45080b08385928a29aa30 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:59 +0000 Subject: [PATCH 08/13] nfs: modify update_changeattr to deal with regular files Until now, change attributes in change_info form were only returned by directory operations. However, they are also used for the RFC 8276 extended attribute operations, which work on both directories and regular files. Modify update_changeattr to deal: * Rename it to nfs4_update_changeattr and make it non-static. * Don't always use INO_INVALID_DATA, this isn't needed for a directory that only had its extended attributes changed by us. * Existing callers now always pass in INO_INVALID_DATA. For the current callers of this function, behavior is unchanged. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++++ fs/nfs/nfs4proc.c | 70 +++++++++++++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 526b3e70d57c..c16ec860b8b3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -324,6 +324,11 @@ extern int update_open_stateid(struct nfs4_state *state, extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); +extern void nfs4_update_changeattr(struct inode *dir, + struct nfs4_change_info *cinfo, + unsigned long timestamp, + unsigned long cache_validity); + #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c88351f8b18d..6540071cb228 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1158,37 +1158,48 @@ nfs4_dec_nlink_locked(struct inode *inode) } static void -update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo, +nfs4_update_changeattr_locked(struct inode *inode, + struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity) { - struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME - | NFS_INO_INVALID_DATA | cache_validity; - if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { + + if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; } else { - nfs_force_lookup_revalidate(dir); - if (cinfo->before != inode_peek_iversion_raw(dir)) + if (S_ISDIR(inode->i_mode)) { + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_force_lookup_revalidate(inode); + } else { + if (!NFS_PROTO(inode)->have_delegation(inode, + FMODE_READ)) + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + } + + if (cinfo->before != inode_peek_iversion_raw(inode)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + NFS_INO_INVALID_ACL; } - inode_set_iversion_raw(dir, cinfo->after); + inode_set_iversion_raw(inode, cinfo->after); nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; - nfs_fscache_invalidate(dir); + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); } -static void -update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, +void +nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, cinfo, timestamp, cache_validity); + nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity); spin_unlock(&dir->i_lock); } @@ -2644,8 +2655,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, data->file_created = true; if (data->file_created || inode_peek_iversion_raw(dir) != o_res->cinfo.after) - update_changeattr(dir, &o_res->cinfo, - o_res->f_attr->time_start, 0); + nfs4_update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start, + NFS_INO_INVALID_DATA); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; @@ -4531,7 +4543,8 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype) status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); if (status == 0) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, &res.cinfo, timestamp, 0); + nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp, + NFS_INO_INVALID_DATA); /* Removing a directory decrements nlink in the parent */ if (ftype == NF4DIR && dir->i_nlink > 2) nfs4_dec_nlink_locked(dir); @@ -4615,8 +4628,9 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) &data->timeout) == -EAGAIN) return 0; if (task->tk_status == 0) - update_changeattr(dir, &res->cinfo, - res->dir_attr->time_start, 0); + nfs4_update_changeattr(dir, &res->cinfo, + res->dir_attr->time_start, + NFS_INO_INVALID_DATA); return 1; } @@ -4660,16 +4674,18 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, if (task->tk_status == 0) { if (new_dir != old_dir) { /* Note: If we moved a directory, nlink will change */ - update_changeattr(old_dir, &res->old_cinfo, + nfs4_update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start, - NFS_INO_INVALID_OTHER); - update_changeattr(new_dir, &res->new_cinfo, + NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_DATA); + nfs4_update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start, - NFS_INO_INVALID_OTHER); + NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_DATA); } else - update_changeattr(old_dir, &res->old_cinfo, + nfs4_update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start, - 0); + NFS_INO_INVALID_DATA); } return 1; } @@ -4710,7 +4726,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0); + nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, + NFS_INO_INVALID_DATA); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) nfs_setsecurity(inode, res.fattr, res.label); @@ -4788,8 +4805,9 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, &data->res.dir_cinfo, - data->res.fattr->time_start, 0); + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, + data->res.fattr->time_start, + NFS_INO_INVALID_DATA); /* Creating a directory bumps nlink in the parent */ if (data->arg.ftype == NF4DIR) nfs4_inc_nlink_locked(dir); From 0f44da51aeef9c974ea744c0d9e24d54eec4e94c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:00 +0000 Subject: [PATCH 09/13] nfs: define and use the NFS_INO_INVALID_XATTR flag Define the NFS_INO_INVALID_XATTR flag, to be used for the NFSv4.2 xattr cache, and use it where appropriate. No functional change as yet. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++++++- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfstrace.h | 3 ++- include/linux/nfs_fs.h | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0bf1f835de01..629af798dfc9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -204,7 +204,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) flags &= ~NFS_INO_INVALID_OTHER; flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_XATTR); } if (inode->i_mapping->nrpages == 0) @@ -542,6 +543,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + if (nfs_server_capable(inode, NFS_CAP_XATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1375,6 +1378,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + else if (nfs_server_capable(inode, NFS_CAP_XATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ ts = inode->i_ctime; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6540071cb228..0d123fe0a423 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1183,7 +1183,8 @@ nfs4_update_changeattr_locked(struct inode *inode, if (cinfo->before != inode_peek_iversion_raw(inode)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR; } inode_set_iversion_raw(inode, cinfo->after); nfsi->read_cache_jiffies = timestamp; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 547cec79899f..5a59dcdce0b2 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -59,7 +59,8 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ - { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ + { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }) TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); TRACE_DEFINE_ENUM(NFS_INO_STALE); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 714b577dce19..943ee750d68c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -234,6 +234,7 @@ struct nfs4_copy_state { #define NFS_INO_DATA_INVAL_DEFER \ BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ +#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ From ccde1e9c0130b4182ae91adac3908f6f3277580a Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:01 +0000 Subject: [PATCH 10/13] nfs: make the buf_to_pages_noslab function available to the nfs code Make the buf_to_pages_noslab function available to the rest of the NFS code. Rename it to nfs4_buf_to_pages_noslab to be consistent. This will be used later in the NFSv4.2 xattr code. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c16ec860b8b3..2fa9e4ea98d2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -328,6 +328,8 @@ extern void nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity); +extern int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, + struct page **pages); #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d123fe0a423..0fbd2925a828 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5541,7 +5541,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) -static int buf_to_pages_noslab(const void *buf, size_t buflen, +int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages) { struct page *newpage, **spages; @@ -5783,7 +5783,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); + i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_make_writeable(inode); From c10a75145febb588d96c6adda6eb5ae2338e4e32 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:02 +0000 Subject: [PATCH 11/13] NFSv4.2: add the extended attribute proc functions. Implement the extended attribute procedures for NFSv4.2 extended attribute support (RFC 8276). Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 8 ++ fs/nfs/nfs42proc.c | 236 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 51de8ddc7d88..0fe5aacbcfdf 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -39,6 +39,14 @@ static inline bool nfs42_files_from_same_server(struct file *in, c_out->cl_serverowner); } +ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen); +int nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags); +ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp); +int nfs42_proc_removexattr(struct inode *inode, const char *name); + /* * Maximum XDR buffer size needed for a listxattr buffer of buflen size. * diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index e2ae54b35dfe..8c2e52bc986a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1088,3 +1088,239 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f, nfs_put_lock_context(src_lock); return err; } + +#define NFS4XATTR_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) + +static int _nfs42_proc_removexattr(struct inode *inode, const char *name) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs42_removexattrargs args = { + .fh = NFS_FH(inode), + .xattr_name = name, + }; + struct nfs42_removexattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int ret; + unsigned long timestamp = jiffies; + + ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 1); + if (!ret) + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + + return ret; +} + +static int _nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4XATTR_MAXPAGES]; + struct nfs42_setxattrargs arg = { + .fh = NFS_FH(inode), + .xattr_pages = pages, + .xattr_len = buflen, + .xattr_name = name, + .xattr_flags = flags, + }; + struct nfs42_setxattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret, np; + unsigned long timestamp = jiffies; + + if (buflen > server->sxasize) + return -ERANGE; + + if (buflen > 0) { + np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages); + if (np < 0) + return np; + } else + np = 0; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 1); + + for (; np > 0; np--) + put_page(pages[np - 1]); + + if (!ret) + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + + return ret; +} + +static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4XATTR_MAXPAGES] = {}; + struct nfs42_getxattrargs arg = { + .fh = NFS_FH(inode), + .xattr_pages = pages, + .xattr_len = buflen, + .xattr_name = name, + }; + struct nfs42_getxattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETXATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret, np; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 0); + if (ret < 0) + return ret; + + if (buflen) { + if (res.xattr_len > buflen) + return -ERANGE; + _copy_from_pages(buf, pages, 0, res.xattr_len); + } + + np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE); + while (--np >= 0) + __free_page(pages[np]); + + return res.xattr_len; +} + +static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page **pages; + struct nfs42_listxattrsargs arg = { + .fh = NFS_FH(inode), + .cookie = *cookiep, + }; + struct nfs42_listxattrsres res = { + .eof = false, + .xattr_buf = buf, + .xattr_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LISTXATTRS], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + u32 xdrlen; + int ret, np; + + + res.scratch = alloc_page(GFP_KERNEL); + if (!res.scratch) + return -ENOMEM; + + xdrlen = nfs42_listxattr_xdrsize(buflen); + if (xdrlen > server->lxasize) + xdrlen = server->lxasize; + np = xdrlen / PAGE_SIZE + 1; + + pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + __free_page(res.scratch); + return -ENOMEM; + } + + arg.xattr_pages = pages; + arg.count = xdrlen; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 0); + + if (ret >= 0) { + ret = res.copied; + *cookiep = res.cookie; + *eofp = res.eof; + } + + while (--np >= 0) { + if (pages[np]) + __free_page(pages[np]); + } + + __free_page(res.scratch); + kfree(pages); + + return ret; + +} + +ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t err; + + do { + err = _nfs42_proc_getxattr(inode, name, buf, buflen); + if (err >= 0) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_setxattr(inode, name, buf, buflen, flags); + if (!err) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp) +{ + struct nfs4_exception exception = { }; + ssize_t err; + + do { + err = _nfs42_proc_listxattrs(inode, buf, buflen, + cookiep, eofp); + if (err >= 0) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_removexattr(struct inode *inode, const char *name) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_removexattr(inode, name); + if (!err) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} From 012a211abd5db098094ce429de5f046368391e68 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:03 +0000 Subject: [PATCH 12/13] NFSv4.2: hook in the user extended attribute handlers Now that all the lower level code is there to make the RPC calls, hook it in to the xattr handlers and the listxattr entry point, to make them available. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 123 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0fbd2925a828..92a07956f07b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -66,6 +66,7 @@ #include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" +#include "nfs42.h" #include "nfs4trace.h" @@ -7440,6 +7441,103 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) #endif +#ifdef CONFIG_NFS_V4_2 +static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + /* + * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA* + * flags right now. Handling of xattr operations use the normal + * file read/write permissions. + * + * Just in case the server has other ideas (which RFC 8276 allows), + * do a cached access check for the XA* flags to possibly avoid + * doing an RPC and getting EACCES back. + */ + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XAWRITE)) + return -EACCES; + } + + if (buf == NULL) + return nfs42_proc_removexattr(inode, key); + else + return nfs42_proc_setxattr(inode, key, buf, buflen, flags); +} + +static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XAREAD)) + return -EACCES; + } + + return nfs42_proc_getxattr(inode, key, buf, buflen); +} + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + u64 cookie; + bool eof; + int ret, size; + char *buf; + size_t buflen; + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return 0; + + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XALIST)) + return 0; + } + + cookie = 0; + eof = false; + buflen = list_len ? list_len : XATTR_LIST_MAX; + buf = list_len ? list : NULL; + size = 0; + + while (!eof) { + ret = nfs42_proc_listxattrs(inode, buf, buflen, + &cookie, &eof); + if (ret < 0) + return ret; + + if (list_len) { + buf += ret; + buflen -= ret; + } + size += ret; + } + + return size; +} + +#else + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_2 */ + /* * nfs_fhget will use either the mounted_on_fileid or the fileid */ @@ -10045,7 +10143,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { - ssize_t error, error2; + ssize_t error, error2, error3; error = generic_listxattr(dentry, list, size); if (error < 0) @@ -10058,7 +10156,17 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); if (error2 < 0) return error2; - return error + error2; + + if (list) { + list += error2; + size -= error2; + } + + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + if (error3 < 0) + return error3; + + return error + error2 + error3; } static const struct inode_operations nfs4_dir_inode_operations = { @@ -10146,10 +10254,21 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { .set = nfs4_xattr_set_nfs4_acl, }; +#ifdef CONFIG_NFS_V4_2 +static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = nfs4_xattr_get_nfs4_user, + .set = nfs4_xattr_set_nfs4_user, +}; +#endif + const struct xattr_handler *nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, #ifdef CONFIG_NFS_V4_SECURITY_LABEL &nfs4_xattr_nfs4_label_handler, +#endif +#ifdef CONFIG_NFS_V4_2 + &nfs4_xattr_nfs4_user_handler, #endif NULL }; From 95ad37f90c338e3fd4abf61cecfe02b6f3e080f0 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:04 +0000 Subject: [PATCH 13/13] NFSv4.2: add client side xattr caching. Implement client side caching for NFSv4.2 extended attributes. The cache is a per-inode hashtable, with name/value entries. There is one special entry for the listxattr cache. NFS inodes have a pointer to a cache structure. The cache structure is allocated on demand, freed when the cache is invalidated. Memory shrinkers keep the size in check. Large entries (> PAGE_SIZE) are collected by a separate shrinker, and freed more aggressively than others. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/inode.c | 9 +- fs/nfs/nfs42proc.c | 12 + fs/nfs/nfs42xattr.c | 1083 +++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 22 + fs/nfs/nfs4proc.c | 42 +- fs/nfs/nfs4super.c | 10 + include/linux/nfs_fs.h | 6 + include/uapi/linux/nfs_fs.h | 1 + 9 files changed, 1179 insertions(+), 8 deletions(-) create mode 100644 fs/nfs/nfs42xattr.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 2433c3e03cfa..22d11fdc6deb 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -30,7 +30,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o -nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o +nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o nfs42xattr.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 629af798dfc9..10048eed485d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -193,6 +193,7 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) return nfs_check_cache_invalid_not_delegated(inode, flags); } +EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { @@ -234,11 +235,13 @@ static void nfs_zap_caches_locked(struct inode *inode) | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR | NFS_INO_REVAL_PAGECACHE); } else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR | NFS_INO_REVAL_PAGECACHE); nfs_zap_label_cache_locked(nfsi); } @@ -1897,7 +1900,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(have_writers || have_delegation)) { invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR; /* Force revalidate of all attributes */ save_cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME @@ -2100,6 +2104,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb) #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ +#ifdef CONFIG_NFS_V4_2 + nfsi->xattr_cache = NULL; +#endif return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 8c2e52bc986a..e200522469af 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1182,6 +1182,18 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, if (ret < 0) return ret; + /* + * Normally, the caching is done one layer up, but for successful + * RPCS, always cache the result here, even if the caller was + * just querying the length, or if the reply was too big for + * the caller. This avoids a second RPC in the case of the + * common query-alloc-retrieve cycle for xattrs. + * + * Note that xattr_len is always capped to XATTR_SIZE_MAX. + */ + + nfs4_xattr_cache_add(inode, name, NULL, pages, res.xattr_len); + if (buflen) { if (res.xattr_len > buflen) return -ERANGE; diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c new file mode 100644 index 000000000000..23fdab977a2a --- /dev/null +++ b/fs/nfs/nfs42xattr.c @@ -0,0 +1,1083 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * User extended attribute client side cache functions. + * + * Author: Frank van der Linden + */ +#include +#include +#include +#include +#include + +#include "nfs4_fs.h" +#include "internal.h" + +/* + * User extended attributes client side caching is implemented by having + * a cache structure attached to NFS inodes. This structure is allocated + * when needed, and freed when the cache is zapped. + * + * The cache structure contains as hash table of entries, and a pointer + * to a special-cased entry for the listxattr cache. + * + * Accessing and allocating / freeing the caches is done via reference + * counting. The cache entries use a similar refcounting scheme. + * + * This makes freeing a cache, both from the shrinker and from the + * zap cache path, easy. It also means that, in current use cases, + * the large majority of inodes will not waste any memory, as they + * will never have any user extended attributes assigned to them. + * + * Attribute entries are hashed in to a simple hash table. They are + * also part of an LRU. + * + * There are three shrinkers. + * + * Two shrinkers deal with the cache entries themselves: one for + * large entries (> PAGE_SIZE), and one for smaller entries. The + * shrinker for the larger entries works more aggressively than + * those for the smaller entries. + * + * The other shrinker frees the cache structures themselves. + */ + +/* + * 64 buckets is a good default. There is likely no reasonable + * workload that uses more than even 64 user extended attributes. + * You can certainly add a lot more - but you get what you ask for + * in those circumstances. + */ +#define NFS4_XATTR_HASH_SIZE 64 + +#define NFSDBG_FACILITY NFSDBG_XATTRCACHE + +struct nfs4_xattr_cache; +struct nfs4_xattr_entry; + +struct nfs4_xattr_bucket { + spinlock_t lock; + struct hlist_head hlist; + struct nfs4_xattr_cache *cache; + bool draining; +}; + +struct nfs4_xattr_cache { + struct kref ref; + spinlock_t hash_lock; /* protects hashtable and lru */ + struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; + struct list_head lru; + struct list_head dispose; + atomic_long_t nent; + spinlock_t listxattr_lock; + struct inode *inode; + struct nfs4_xattr_entry *listxattr; + struct work_struct work; +}; + +struct nfs4_xattr_entry { + struct kref ref; + struct hlist_node hnode; + struct list_head lru; + struct list_head dispose; + char *xattr_name; + void *xattr_value; + size_t xattr_size; + struct nfs4_xattr_bucket *bucket; + uint32_t flags; +}; + +#define NFS4_XATTR_ENTRY_EXTVAL 0x0001 + +/* + * LRU list of NFS inodes that have xattr caches. + */ +static struct list_lru nfs4_xattr_cache_lru; +static struct list_lru nfs4_xattr_entry_lru; +static struct list_lru nfs4_xattr_large_entry_lru; + +static struct kmem_cache *nfs4_xattr_cache_cachep; + +static struct workqueue_struct *nfs4_xattr_cache_wq; + +/* + * Hashing helper functions. + */ +static void +nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) +{ + unsigned int i; + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&cache->buckets[i].hlist); + spin_lock_init(&cache->buckets[i].lock); + cache->buckets[i].cache = cache; + cache->buckets[i].draining = false; + } +} + +/* + * Locking order: + * 1. inode i_lock or bucket lock + * 2. list_lru lock (taken by list_lru_* functions) + */ + +/* + * Wrapper functions to add a cache entry to the right LRU. + */ +static bool +nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) +{ + struct list_lru *lru; + + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + return list_lru_add(lru, &entry->lru); +} + +static bool +nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) +{ + struct list_lru *lru; + + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + return list_lru_del(lru, &entry->lru); +} + +/* + * This function allocates cache entries. They are the normal + * extended attribute name/value pairs, but may also be a listxattr + * cache. Those allocations use the same entry so that they can be + * treated as one by the memory shrinker. + * + * xattr cache entries are allocated together with names. If the + * value fits in to one page with the entry structure and the name, + * it will also be part of the same allocation (kmalloc). This is + * expected to be the vast majority of cases. Larger allocations + * have a value pointer that is allocated separately by kvmalloc. + * + * Parameters: + * + * @name: Name of the extended attribute. NULL for listxattr cache + * entry. + * @value: Value of attribute, or listxattr cache. NULL if the + * value is to be copied from pages instead. + * @pages: Pages to copy the value from, if not NULL. Passed in to + * make it easier to copy the value after an RPC, even if + * the value will not be passed up to application (e.g. + * for a 'query' getxattr with NULL buffer). + * @len: Length of the value. Can be 0 for zero-length attribues. + * @value and @pages will be NULL if @len is 0. + */ +static struct nfs4_xattr_entry * +nfs4_xattr_alloc_entry(const char *name, const void *value, + struct page **pages, size_t len) +{ + struct nfs4_xattr_entry *entry; + void *valp; + char *namep; + size_t alloclen, slen; + char *buf; + uint32_t flags; + + BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + + XATTR_NAME_MAX + 1 > PAGE_SIZE); + + alloclen = sizeof(struct nfs4_xattr_entry); + if (name != NULL) { + slen = strlen(name) + 1; + alloclen += slen; + } else + slen = 0; + + if (alloclen + len <= PAGE_SIZE) { + alloclen += len; + flags = 0; + } else { + flags = NFS4_XATTR_ENTRY_EXTVAL; + } + + buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (buf == NULL) + return NULL; + entry = (struct nfs4_xattr_entry *)buf; + + if (name != NULL) { + namep = buf + sizeof(struct nfs4_xattr_entry); + memcpy(namep, name, slen); + } else { + namep = NULL; + } + + + if (flags & NFS4_XATTR_ENTRY_EXTVAL) { + valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (valp == NULL) { + kfree(buf); + return NULL; + } + } else if (len != 0) { + valp = buf + sizeof(struct nfs4_xattr_entry) + slen; + } else + valp = NULL; + + if (valp != NULL) { + if (value != NULL) + memcpy(valp, value, len); + else + _copy_from_pages(valp, pages, 0, len); + } + + entry->flags = flags; + entry->xattr_value = valp; + kref_init(&entry->ref); + entry->xattr_name = namep; + entry->xattr_size = len; + entry->bucket = NULL; + INIT_LIST_HEAD(&entry->lru); + INIT_LIST_HEAD(&entry->dispose); + INIT_HLIST_NODE(&entry->hnode); + + return entry; +} + +static void +nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) +{ + if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) + kvfree(entry->xattr_value); + kfree(entry); +} + +static void +nfs4_xattr_free_entry_cb(struct kref *kref) +{ + struct nfs4_xattr_entry *entry; + + entry = container_of(kref, struct nfs4_xattr_entry, ref); + + if (WARN_ON(!list_empty(&entry->lru))) + return; + + nfs4_xattr_free_entry(entry); +} + +static void +nfs4_xattr_free_cache_cb(struct kref *kref) +{ + struct nfs4_xattr_cache *cache; + int i; + + cache = container_of(kref, struct nfs4_xattr_cache, ref); + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) + return; + cache->buckets[i].draining = false; + } + + cache->listxattr = NULL; + + kmem_cache_free(nfs4_xattr_cache_cachep, cache); + +} + +static struct nfs4_xattr_cache * +nfs4_xattr_alloc_cache(void) +{ + struct nfs4_xattr_cache *cache; + + cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, + GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (cache == NULL) + return NULL; + + kref_init(&cache->ref); + atomic_long_set(&cache->nent, 0); + + return cache; +} + +/* + * Set the listxattr cache, which is a special-cased cache entry. + * The special value ERR_PTR(-ESTALE) is used to indicate that + * the cache is being drained - this prevents a new listxattr + * cache from being added to what is now a stale cache. + */ +static int +nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, + struct nfs4_xattr_entry *new) +{ + struct nfs4_xattr_entry *old; + int ret = 1; + + spin_lock(&cache->listxattr_lock); + + old = cache->listxattr; + + if (old == ERR_PTR(-ESTALE)) { + ret = 0; + goto out; + } + + cache->listxattr = new; + if (new != NULL && new != ERR_PTR(-ESTALE)) + nfs4_xattr_entry_lru_add(new); + + if (old != NULL) { + nfs4_xattr_entry_lru_del(old); + kref_put(&old->ref, nfs4_xattr_free_entry_cb); + } +out: + spin_unlock(&cache->listxattr_lock); + + return ret; +} + +/* + * Unlink a cache from its parent inode, clearing out an invalid + * cache. Must be called with i_lock held. + */ +static struct nfs4_xattr_cache * +nfs4_xattr_cache_unlink(struct inode *inode) +{ + struct nfs_inode *nfsi; + struct nfs4_xattr_cache *oldcache; + + nfsi = NFS_I(inode); + + oldcache = nfsi->xattr_cache; + if (oldcache != NULL) { + list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); + oldcache->inode = NULL; + } + nfsi->xattr_cache = NULL; + nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; + + return oldcache; + +} + +/* + * Discard a cache. Usually called by a worker, since walking all + * the entries can take up some cycles that we don't want to waste + * in the I/O path. Can also be called from the shrinker callback. + * + * The cache is dead, it has already been unlinked from its inode, + * and no longer appears on the cache LRU list. + * + * Mark all buckets as draining, so that no new entries are added. This + * could still happen in the unlikely, but possible case that another + * thread had grabbed a reference before it was unlinked from the inode, + * and is still holding it for an add operation. + * + * Remove all entries from the LRU lists, so that there is no longer + * any way to 'find' this cache. Then, remove the entries from the hash + * table. + * + * At that point, the cache will remain empty and can be freed when the final + * reference drops, which is very likely the kref_put at the end of + * this function, or the one called immediately afterwards in the + * shrinker callback. + */ +static void +nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) +{ + unsigned int i; + struct nfs4_xattr_entry *entry; + struct nfs4_xattr_bucket *bucket; + struct hlist_node *n; + + nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE)); + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + bucket = &cache->buckets[i]; + + spin_lock(&bucket->lock); + bucket->draining = true; + hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { + nfs4_xattr_entry_lru_del(entry); + hlist_del_init(&entry->hnode); + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } + spin_unlock(&bucket->lock); + } + + atomic_long_set(&cache->nent, 0); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +static void +nfs4_xattr_discard_cache_worker(struct work_struct *work) +{ + struct nfs4_xattr_cache *cache = container_of(work, + struct nfs4_xattr_cache, work); + + nfs4_xattr_discard_cache(cache); +} + +static void +nfs4_xattr_reap_cache(struct nfs4_xattr_cache *cache) +{ + queue_work(nfs4_xattr_cache_wq, &cache->work); +} + +/* + * Get a referenced copy of the cache structure. Avoid doing allocs + * while holding i_lock. Which means that we do some optimistic allocation, + * and might have to free the result in rare cases. + * + * This function only checks the NFS_INO_INVALID_XATTR cache validity bit + * and acts accordingly, replacing the cache when needed. For the read case + * (!add), this means that the caller must make sure that the cache + * is valid before caling this function. getxattr and listxattr call + * revalidate_inode to do this. The attribute cache timeout (for the + * non-delegated case) is expected to be dealt with in the revalidate + * call. + */ + +static struct nfs4_xattr_cache * +nfs4_xattr_get_cache(struct inode *inode, int add) +{ + struct nfs_inode *nfsi; + struct nfs4_xattr_cache *cache, *oldcache, *newcache; + + nfsi = NFS_I(inode); + + cache = oldcache = NULL; + + spin_lock(&inode->i_lock); + + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) + oldcache = nfs4_xattr_cache_unlink(inode); + else + cache = nfsi->xattr_cache; + + if (cache != NULL) + kref_get(&cache->ref); + + spin_unlock(&inode->i_lock); + + if (add && cache == NULL) { + newcache = NULL; + + cache = nfs4_xattr_alloc_cache(); + if (cache == NULL) + goto out; + + spin_lock(&inode->i_lock); + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { + /* + * The cache was invalidated again. Give up, + * since what we want to enter is now likely + * outdated anyway. + */ + spin_unlock(&inode->i_lock); + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + cache = NULL; + goto out; + } + + /* + * Check if someone beat us to it. + */ + if (nfsi->xattr_cache != NULL) { + newcache = nfsi->xattr_cache; + kref_get(&newcache->ref); + } else { + kref_get(&cache->ref); + nfsi->xattr_cache = cache; + cache->inode = inode; + list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); + } + + spin_unlock(&inode->i_lock); + + /* + * If there was a race, throw away the cache we just + * allocated, and use the new one allocated by someone + * else. + */ + if (newcache != NULL) { + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + cache = newcache; + } + } + +out: + /* + * Discarding an old cache is done via a workqueue. + */ + if (oldcache != NULL) + nfs4_xattr_reap_cache(oldcache); + + return cache; +} + +static inline struct nfs4_xattr_bucket * +nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) +{ + return &cache->buckets[jhash(name, strlen(name), 0) & + (ARRAY_SIZE(cache->buckets) - 1)]; +} + +static struct nfs4_xattr_entry * +nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) +{ + struct nfs4_xattr_entry *entry; + + entry = NULL; + + hlist_for_each_entry(entry, &bucket->hlist, hnode) { + if (!strcmp(entry->xattr_name, name)) + break; + } + + return entry; +} + +static int +nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, + struct nfs4_xattr_entry *entry) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *oldentry = NULL; + int ret = 1; + + bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name); + entry->bucket = bucket; + + spin_lock(&bucket->lock); + + if (bucket->draining) { + ret = 0; + goto out; + } + + oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name); + if (oldentry != NULL) { + hlist_del_init(&oldentry->hnode); + nfs4_xattr_entry_lru_del(oldentry); + } else { + atomic_long_inc(&cache->nent); + } + + hlist_add_head(&entry->hnode, &bucket->hlist); + nfs4_xattr_entry_lru_add(entry); + +out: + spin_unlock(&bucket->lock); + + if (oldentry != NULL) + kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb); + + return ret; +} + +static void +nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *entry; + + bucket = nfs4_xattr_hash_bucket(cache, name); + + spin_lock(&bucket->lock); + + entry = nfs4_xattr_get_entry(bucket, name); + if (entry != NULL) { + hlist_del_init(&entry->hnode); + nfs4_xattr_entry_lru_del(entry); + atomic_long_dec(&cache->nent); + } + + spin_unlock(&bucket->lock); + + if (entry != NULL) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); +} + +static struct nfs4_xattr_entry * +nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *entry; + + bucket = nfs4_xattr_hash_bucket(cache, name); + + spin_lock(&bucket->lock); + + entry = nfs4_xattr_get_entry(bucket, name); + if (entry != NULL) + kref_get(&entry->ref); + + spin_unlock(&bucket->lock); + + return entry; +} + +/* + * Entry point to retrieve an entry from the cache. + */ +ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, + ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + ssize_t ret; + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return -ENOENT; + + ret = 0; + entry = nfs4_xattr_hash_find(cache, name); + + if (entry != NULL) { + dprintk("%s: cache hit '%s', len %lu\n", __func__, + entry->xattr_name, (unsigned long)entry->xattr_size); + if (buflen == 0) { + /* Length probe only */ + ret = entry->xattr_size; + } else if (buflen < entry->xattr_size) + ret = -ERANGE; + else { + memcpy(buf, entry->xattr_value, entry->xattr_size); + ret = entry->xattr_size; + } + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } else { + dprintk("%s: cache miss '%s'\n", __func__, name); + ret = -ENOENT; + } + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + + return ret; +} + +/* + * Retrieve a cached list of xattrs from the cache. + */ +ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + ssize_t ret; + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return -ENOENT; + + spin_lock(&cache->listxattr_lock); + + entry = cache->listxattr; + + if (entry != NULL && entry != ERR_PTR(-ESTALE)) { + if (buflen == 0) { + /* Length probe only */ + ret = entry->xattr_size; + } else if (entry->xattr_size > buflen) + ret = -ERANGE; + else { + memcpy(buf, entry->xattr_value, entry->xattr_size); + ret = entry->xattr_size; + } + } else { + ret = -ENOENT; + } + + spin_unlock(&cache->listxattr_lock); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + + return ret; +} + +/* + * Add an xattr to the cache. + * + * This also invalidates the xattr list cache. + */ +void nfs4_xattr_cache_add(struct inode *inode, const char *name, + const char *buf, struct page **pages, ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + + dprintk("%s: add '%s' len %lu\n", __func__, + name, (unsigned long)buflen); + + cache = nfs4_xattr_get_cache(inode, 1); + if (cache == NULL) + return; + + entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen); + if (entry == NULL) + goto out; + + (void)nfs4_xattr_set_listcache(cache, NULL); + + if (!nfs4_xattr_hash_add(cache, entry)) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + +out: + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + + +/* + * Remove an xattr from the cache. + * + * This also invalidates the xattr list cache. + */ +void nfs4_xattr_cache_remove(struct inode *inode, const char *name) +{ + struct nfs4_xattr_cache *cache; + + dprintk("%s: remove '%s'\n", __func__, name); + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return; + + (void)nfs4_xattr_set_listcache(cache, NULL); + nfs4_xattr_hash_remove(cache, name); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +/* + * Cache listxattr output, replacing any possible old one. + */ +void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, + ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + + cache = nfs4_xattr_get_cache(inode, 1); + if (cache == NULL) + return; + + entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen); + if (entry == NULL) + goto out; + + /* + * This is just there to be able to get to bucket->cache, + * which is obviously the same for all buckets, so just + * use bucket 0. + */ + entry->bucket = &cache->buckets[0]; + + if (!nfs4_xattr_set_listcache(cache, entry)) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + +out: + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +/* + * Zap the entire cache. Called when an inode is evicted. + */ +void nfs4_xattr_cache_zap(struct inode *inode) +{ + struct nfs4_xattr_cache *oldcache; + + spin_lock(&inode->i_lock); + oldcache = nfs4_xattr_cache_unlink(inode); + spin_unlock(&inode->i_lock); + + if (oldcache) + nfs4_xattr_discard_cache(oldcache); +} + +/* + * The entry LRU is shrunk more aggressively than the cache LRU, + * by settings @seeks to 1. + * + * Cache structures are freed only when they've become empty, after + * pruning all but one entry. + */ + +static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, + struct shrink_control *sc); + +static struct shrinker nfs4_xattr_cache_shrinker = { + .count_objects = nfs4_xattr_cache_count, + .scan_objects = nfs4_xattr_cache_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static struct shrinker nfs4_xattr_entry_shrinker = { + .count_objects = nfs4_xattr_entry_count, + .scan_objects = nfs4_xattr_entry_scan, + .seeks = DEFAULT_SEEKS, + .batch = 512, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static struct shrinker nfs4_xattr_large_entry_shrinker = { + .count_objects = nfs4_xattr_entry_count, + .scan_objects = nfs4_xattr_entry_scan, + .seeks = 1, + .batch = 512, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static enum lru_status +cache_lru_isolate(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) +{ + struct list_head *dispose = arg; + struct inode *inode; + struct nfs4_xattr_cache *cache = container_of(item, + struct nfs4_xattr_cache, lru); + + if (atomic_long_read(&cache->nent) > 1) + return LRU_SKIP; + + /* + * If a cache structure is on the LRU list, we know that + * its inode is valid. Try to lock it to break the link. + * Since we're inverting the lock order here, only try. + */ + inode = cache->inode; + + if (!spin_trylock(&inode->i_lock)) + return LRU_SKIP; + + kref_get(&cache->ref); + + cache->inode = NULL; + NFS_I(inode)->xattr_cache = NULL; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; + list_lru_isolate(lru, &cache->lru); + + spin_unlock(&inode->i_lock); + + list_add_tail(&cache->dispose, dispose); + return LRU_REMOVED; +} + +static unsigned long +nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + struct nfs4_xattr_cache *cache; + + freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc, + cache_lru_isolate, &dispose); + while (!list_empty(&dispose)) { + cache = list_first_entry(&dispose, struct nfs4_xattr_cache, + dispose); + list_del_init(&cache->dispose); + nfs4_xattr_discard_cache(cache); + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + } + + return freed; +} + + +static unsigned long +nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long count; + + count = list_lru_count(&nfs4_xattr_cache_lru); + return vfs_pressure_ratio(count); +} + +static enum lru_status +entry_lru_isolate(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) +{ + struct list_head *dispose = arg; + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry = container_of(item, + struct nfs4_xattr_entry, lru); + + bucket = entry->bucket; + cache = bucket->cache; + + /* + * Unhook the entry from its parent (either a cache bucket + * or a cache structure if it's a listxattr buf), so that + * it's no longer found. Then add it to the isolate list, + * to be freed later. + * + * In both cases, we're reverting lock order, so use + * trylock and skip the entry if we can't get the lock. + */ + if (entry->xattr_name != NULL) { + /* Regular cache entry */ + if (!spin_trylock(&bucket->lock)) + return LRU_SKIP; + + kref_get(&entry->ref); + + hlist_del_init(&entry->hnode); + atomic_long_dec(&cache->nent); + list_lru_isolate(lru, &entry->lru); + + spin_unlock(&bucket->lock); + } else { + /* Listxattr cache entry */ + if (!spin_trylock(&cache->listxattr_lock)) + return LRU_SKIP; + + kref_get(&entry->ref); + + cache->listxattr = NULL; + list_lru_isolate(lru, &entry->lru); + + spin_unlock(&cache->listxattr_lock); + } + + list_add_tail(&entry->dispose, dispose); + return LRU_REMOVED; +} + +static unsigned long +nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + struct nfs4_xattr_entry *entry; + struct list_lru *lru; + + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); + + while (!list_empty(&dispose)) { + entry = list_first_entry(&dispose, struct nfs4_xattr_entry, + dispose); + list_del_init(&entry->dispose); + + /* + * Drop two references: the one that we just grabbed + * in entry_lru_isolate, and the one that was set + * when the entry was first allocated. + */ + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } + + return freed; +} + +static unsigned long +nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long count; + struct list_lru *lru; + + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + count = list_lru_count(lru); + return vfs_pressure_ratio(count); +} + + +static void nfs4_xattr_cache_init_once(void *p) +{ + struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; + + spin_lock_init(&cache->listxattr_lock); + atomic_long_set(&cache->nent, 0); + nfs4_xattr_hash_init(cache); + cache->listxattr = NULL; + INIT_WORK(&cache->work, nfs4_xattr_discard_cache_worker); + INIT_LIST_HEAD(&cache->lru); + INIT_LIST_HEAD(&cache->dispose); +} + +int __init nfs4_xattr_cache_init(void) +{ + int ret = 0; + + nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", + sizeof(struct nfs4_xattr_cache), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), + nfs4_xattr_cache_init_once); + if (nfs4_xattr_cache_cachep == NULL) + return -ENOMEM; + + ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, + &nfs4_xattr_large_entry_shrinker); + if (ret) + goto out4; + + ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, + &nfs4_xattr_entry_shrinker); + if (ret) + goto out3; + + ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, + &nfs4_xattr_cache_shrinker); + if (ret) + goto out2; + + nfs4_xattr_cache_wq = alloc_workqueue("nfs4_xattr", WQ_MEM_RECLAIM, 0); + if (nfs4_xattr_cache_wq == NULL) + goto out1; + + ret = register_shrinker(&nfs4_xattr_cache_shrinker); + if (ret) + goto out0; + + ret = register_shrinker(&nfs4_xattr_entry_shrinker); + if (ret) + goto out; + + ret = register_shrinker(&nfs4_xattr_large_entry_shrinker); + if (!ret) + return 0; + + unregister_shrinker(&nfs4_xattr_entry_shrinker); +out: + unregister_shrinker(&nfs4_xattr_cache_shrinker); +out0: + destroy_workqueue(nfs4_xattr_cache_wq); +out1: + list_lru_destroy(&nfs4_xattr_cache_lru); +out2: + list_lru_destroy(&nfs4_xattr_entry_lru); +out3: + list_lru_destroy(&nfs4_xattr_large_entry_lru); +out4: + kmem_cache_destroy(nfs4_xattr_cache_cachep); + + return ret; +} + +void nfs4_xattr_cache_exit(void) +{ + unregister_shrinker(&nfs4_xattr_entry_shrinker); + unregister_shrinker(&nfs4_xattr_cache_shrinker); + list_lru_destroy(&nfs4_xattr_entry_lru); + list_lru_destroy(&nfs4_xattr_cache_lru); + kmem_cache_destroy(nfs4_xattr_cache_cachep); + destroy_workqueue(nfs4_xattr_cache_wq); +} diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2fa9e4ea98d2..7e16a586f3fc 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -626,12 +626,34 @@ static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state * nfs4_stateid_match_other(&state->open_stateid, stateid); } +/* nfs42xattr.c */ +#ifdef CONFIG_NFS_V4_2 +extern int __init nfs4_xattr_cache_init(void); +extern void nfs4_xattr_cache_exit(void); +extern void nfs4_xattr_cache_add(struct inode *inode, const char *name, + const char *buf, struct page **pages, + ssize_t buflen); +extern void nfs4_xattr_cache_remove(struct inode *inode, const char *name); +extern ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, + char *buf, ssize_t buflen); +extern void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, + ssize_t buflen); +extern ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, + ssize_t buflen); +extern void nfs4_xattr_cache_zap(struct inode *inode); #else +static inline void nfs4_xattr_cache_zap(struct inode *inode) +{ +} +#endif /* CONFIG_NFS_V4_2 */ + +#else /* CONFIG_NFS_V4 */ #define nfs4_close_state(a, b) do { } while (0) #define nfs4_close_sync(a, b) do { } while (0) #define nfs4_state_protect(a, b, c, d) do { } while (0) #define nfs4_state_protect_write(a, b, c, d) do { } while (0) + #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92a07956f07b..f670ff64b31e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7448,6 +7448,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, size_t buflen, int flags) { struct nfs_access_entry cache; + int ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; @@ -7466,10 +7467,17 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, return -EACCES; } - if (buf == NULL) - return nfs42_proc_removexattr(inode, key); - else - return nfs42_proc_setxattr(inode, key, buf, buflen, flags); + if (buf == NULL) { + ret = nfs42_proc_removexattr(inode, key); + if (!ret) + nfs4_xattr_cache_remove(inode, key); + } else { + ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags); + if (!ret) + nfs4_xattr_cache_add(inode, key, buf, NULL, buflen); + } + + return ret; } static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, @@ -7477,6 +7485,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, const char *key, void *buf, size_t buflen) { struct nfs_access_entry cache; + ssize_t ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; @@ -7486,7 +7495,17 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, return -EACCES; } - return nfs42_proc_getxattr(inode, key, buf, buflen); + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret) + return ret; + + ret = nfs4_xattr_cache_get(inode, key, buf, buflen); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + + ret = nfs42_proc_getxattr(inode, key, buf, buflen); + + return ret; } static ssize_t @@ -7494,7 +7513,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) { u64 cookie; bool eof; - int ret, size; + ssize_t ret, size; char *buf; size_t buflen; struct nfs_access_entry cache; @@ -7507,6 +7526,14 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) return 0; } + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret) + return ret; + + ret = nfs4_xattr_cache_list(inode, list, list_len); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + cookie = 0; eof = false; buflen = list_len ? list_len : XATTR_LIST_MAX; @@ -7526,6 +7553,9 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) size += ret; } + if (list_len) + nfs4_xattr_cache_set_list(inode, list, size); + return size; } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1475f932d7da..0c1ab846b83d 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -69,6 +69,7 @@ static void nfs4_evict_inode(struct inode *inode) pnfs_destroy_layout(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); + nfs4_xattr_cache_zap(inode); } struct nfs_referral_count { @@ -268,6 +269,12 @@ static int __init init_nfs_v4(void) if (err) goto out1; +#ifdef CONFIG_NFS_V4_2 + err = nfs4_xattr_cache_init(); + if (err) + goto out2; +#endif + err = nfs4_register_sysctl(); if (err) goto out2; @@ -288,6 +295,9 @@ static void __exit exit_nfs_v4(void) nfs4_pnfs_v3_ds_connect_unload(); unregister_nfs_version(&nfs_v4); +#ifdef CONFIG_NFS_V4_2 + nfs4_xattr_cache_exit(); +#endif nfs4_unregister_sysctl(); nfs_idmap_quit(); nfs_dns_resolver_destroy(); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 943ee750d68c..a2c6455ea3fa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -102,6 +102,8 @@ struct nfs_delegation; struct posix_acl; +struct nfs4_xattr_cache; + /* * nfs fs inode data in memory */ @@ -188,6 +190,10 @@ struct nfs_inode { struct fscache_cookie *fscache; #endif struct inode vfs_inode; + +#ifdef CONFIG_NFS_V4_2 + struct nfs4_xattr_cache *xattr_cache; +#endif }; struct nfs4_copy_state { diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 7bcc8cd6831d..3afe3767c55d 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -56,6 +56,7 @@ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 +#define NFSDBG_XATTRCACHE 0x8000 #define NFSDBG_ALL 0xFFFF