kernel_optimize_test/net/sctp/diag.c
Eric Dumazet 70c2655849 net: silence KCSAN warnings about sk->sk_backlog.len reads
sk->sk_backlog.len can be written by BH handlers, and read
from process contexts in a lockless way.

Note the write side should also use WRITE_ONCE() or a variant.
We need some agreement about the best way to do this.

syzbot reported :

BUG: KCSAN: data-race in tcp_add_backlog / tcp_grow_window.isra.0

write to 0xffff88812665f32c of 4 bytes by interrupt on cpu 1:
 sk_add_backlog include/net/sock.h:934 [inline]
 tcp_add_backlog+0x4a0/0xcc0 net/ipv4/tcp_ipv4.c:1737
 tcp_v4_rcv+0x1aba/0x1bf0 net/ipv4/tcp_ipv4.c:1925
 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204
 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:442 [inline]
 ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004
 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118
 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208
 napi_skb_finish net/core/dev.c:5671 [inline]
 napi_gro_receive+0x28f/0x330 net/core/dev.c:5704
 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061
 virtnet_receive drivers/net/virtio_net.c:1323 [inline]
 virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428
 napi_poll net/core/dev.c:6352 [inline]
 net_rx_action+0x3ae/0xa50 net/core/dev.c:6418

read to 0xffff88812665f32c of 4 bytes by task 7292 on cpu 0:
 tcp_space include/net/tcp.h:1373 [inline]
 tcp_grow_window.isra.0+0x6b/0x480 net/ipv4/tcp_input.c:413
 tcp_event_data_recv+0x68f/0x990 net/ipv4/tcp_input.c:717
 tcp_rcv_established+0xbfe/0xf50 net/ipv4/tcp_input.c:5618
 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542
 sk_backlog_rcv include/net/sock.h:945 [inline]
 __release_sock+0x135/0x1e0 net/core/sock.c:2427
 release_sock+0x61/0x160 net/core/sock.c:2943
 tcp_recvmsg+0x63b/0x1a30 net/ipv4/tcp.c:2181
 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
 sock_recvmsg_nosec net/socket.c:871 [inline]
 sock_recvmsg net/socket.c:889 [inline]
 sock_recvmsg+0x92/0xb0 net/socket.c:885
 sock_read_iter+0x15f/0x1e0 net/socket.c:967
 call_read_iter include/linux/fs.h:1864 [inline]
 new_sync_read+0x389/0x4f0 fs/read_write.c:414
 __vfs_read+0xb1/0xc0 fs/read_write.c:427
 vfs_read fs/read_write.c:461 [inline]
 vfs_read+0x143/0x2c0 fs/read_write.c:446

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 7292 Comm: syz-fuzzer Not tainted 5.3.0+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
2019-10-09 21:43:00 -07:00

544 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* SCTP kernel implementation
* (C) Copyright Red Hat Inc. 2017
*
* This file is part of the SCTP kernel implementation
*
* These functions implement sctp diag support.
*
* Please send any bug reports or fixes you make to the
* email addresched(es):
* lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Xin Long <lucien.xin@gmail.com>
*/
#include <linux/module.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
#include <net/sctp/sctp.h>
static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *info);
/* define some functions to make asoc/ep fill look clean */
static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
struct sock *sk,
struct sctp_association *asoc)
{
union sctp_addr laddr, paddr;
struct dst_entry *dst;
struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
laddr = list_entry(asoc->base.bind_addr.address_list.next,
struct sctp_sockaddr_entry, list)->a;
paddr = asoc->peer.primary_path->ipaddr;
dst = asoc->peer.primary_path->dst;
r->idiag_family = sk->sk_family;
r->id.idiag_sport = htons(asoc->base.bind_addr.port);
r->id.idiag_dport = htons(asoc->peer.port);
r->id.idiag_if = dst ? dst->dev->ifindex : 0;
sock_diag_save_cookie(sk, r->id.idiag_cookie);
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
*(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr;
*(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr;
} else
#endif
{
memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr;
r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr;
}
r->idiag_state = asoc->state;
if (timer_pending(t3_rtx)) {
r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
r->idiag_retrans = asoc->rtx_data_chunks;
r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
} else {
r->idiag_timer = 0;
r->idiag_retrans = 0;
r->idiag_expires = 0;
}
}
static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
struct list_head *address_list)
{
struct sctp_sockaddr_entry *laddr;
int addrlen = sizeof(struct sockaddr_storage);
int addrcnt = 0;
struct nlattr *attr;
void *info = NULL;
list_for_each_entry_rcu(laddr, address_list, list)
addrcnt++;
attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt);
if (!attr)
return -EMSGSIZE;
info = nla_data(attr);
list_for_each_entry_rcu(laddr, address_list, list) {
memcpy(info, &laddr->a, sizeof(laddr->a));
memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
info += addrlen;
}
return 0;
}
static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
struct sctp_association *asoc)
{
int addrlen = sizeof(struct sockaddr_storage);
struct sctp_transport *from;
struct nlattr *attr;
void *info = NULL;
attr = nla_reserve(skb, INET_DIAG_PEERS,
addrlen * asoc->peer.transport_count);
if (!attr)
return -EMSGSIZE;
info = nla_data(attr);
list_for_each_entry(from, &asoc->peer.transport_addr_list,
transports) {
memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
memset(info + sizeof(from->ipaddr), 0,
addrlen - sizeof(from->ipaddr));
info += addrlen;
}
return 0;
}
/* sctp asoc/ep fill*/
static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
struct sk_buff *skb,
const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
int portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh,
bool net_admin)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct list_head *addr_list;
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
int ext = req->idiag_ext;
struct sctp_infox infox;
void *info = NULL;
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
if (!nlh)
return -EMSGSIZE;
r = nlmsg_data(nlh);
BUG_ON(!sk_fullsock(sk));
if (asoc) {
inet_diag_msg_sctpasoc_fill(r, sk, asoc);
} else {
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
}
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
goto errout;
if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
u32 mem[SK_MEMINFO_VARS];
int amt;
if (asoc && asoc->ep->sndbuf_policy)
amt = asoc->sndbuf_used;
else
amt = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_WMEM_ALLOC] = amt;
if (asoc && asoc->ep->rcvbuf_policy)
amt = atomic_read(&asoc->rmem_alloc);
else
amt = sk_rmem_alloc_get(sk);
mem[SK_MEMINFO_RMEM_ALLOC] = amt;
mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
goto errout;
}
if (ext & (1 << (INET_DIAG_INFO - 1))) {
struct nlattr *attr;
attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
sizeof(struct sctp_info),
INET_DIAG_PAD);
if (!attr)
goto errout;
info = nla_data(attr);
}
infox.sctpinfo = (struct sctp_info *)info;
infox.asoc = asoc;
sctp_diag_get_info(sk, r, &infox);
addr_list = asoc ? &asoc->base.bind_addr.address_list
: &ep->base.bind_addr.address_list;
if (inet_diag_msg_sctpladdrs_fill(skb, addr_list))
goto errout;
if (asoc && (ext & (1 << (INET_DIAG_CONG - 1))))
if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0)
goto errout;
if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc))
goto errout;
nlmsg_end(skb, nlh);
return 0;
errout:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
/* callback and param */
struct sctp_comm_param {
struct sk_buff *skb;
struct netlink_callback *cb;
const struct inet_diag_req_v2 *r;
const struct nlmsghdr *nlh;
bool net_admin;
};
static size_t inet_assoc_attr_size(struct sctp_association *asoc)
{
int addrlen = sizeof(struct sockaddr_storage);
int addrcnt = 0;
struct sctp_sockaddr_entry *laddr;
list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list,
list)
addrcnt++;
return nla_total_size(sizeof(struct sctp_info))
+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ nla_total_size(4) /* INET_DIAG_MARK */
+ nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
+ nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ 64;
}
static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
{
struct sctp_association *assoc = tsp->asoc;
struct sock *sk = tsp->asoc->base.sk;
struct sctp_comm_param *commp = p;
struct sk_buff *in_skb = commp->skb;
const struct inet_diag_req_v2 *req = commp->r;
const struct nlmsghdr *nlh = commp->nlh;
struct net *net = sock_net(in_skb->sk);
struct sk_buff *rep;
int err;
err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
if (err)
goto out;
err = -ENOMEM;
rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
if (!rep)
goto out;
lock_sock(sk);
if (sk != assoc->base.sk) {
release_sock(sk);
sk = assoc->base.sk;
lock_sock(sk);
}
err = inet_sctp_diag_fill(sk, assoc, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, nlh,
commp->net_admin);
release_sock(sk);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(rep);
goto out;
}
err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
out:
return err;
}
static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
{
struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
struct sctp_association *assoc;
int err = 0;
lock_sock(sk);
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) &&
r->id.idiag_sport)
goto next;
if (r->id.idiag_dport != htons(assoc->peer.port) &&
r->id.idiag_dport)
goto next;
if (!cb->args[3] &&
inet_sctp_diag_fill(sk, NULL, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->nlh,
commp->net_admin) < 0) {
err = 1;
goto release;
}
cb->args[3] = 1;
if (inet_sctp_diag_fill(sk, assoc, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0, cb->nlh,
commp->net_admin) < 0) {
err = 1;
goto release;
}
next:
cb->args[4]++;
}
cb->args[1] = 0;
cb->args[3] = 0;
cb->args[4] = 0;
release:
release_sock(sk);
return err;
}
static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
{
struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
struct sctp_association *assoc =
list_entry(ep->asocs.next, struct sctp_association, asocs);
/* find the ep only once through the transports by this condition */
if (tsp->asoc != assoc)
return 0;
if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
return 0;
return 1;
}
static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
{
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
struct net *net = sock_net(skb->sk);
struct inet_sock *inet = inet_sk(sk);
int err = 0;
if (!net_eq(sock_net(sk), net))
goto out;
if (cb->args[4] < cb->args[1])
goto next;
if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
goto next;
if (r->sdiag_family != AF_UNSPEC &&
sk->sk_family != r->sdiag_family)
goto next;
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next;
if (r->id.idiag_dport != inet->inet_dport &&
r->id.idiag_dport)
goto next;
if (inet_sctp_diag_fill(sk, NULL, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->nlh, commp->net_admin) < 0) {
err = 2;
goto out;
}
next:
cb->args[4]++;
out:
return err;
}
/* define the functions for sctp_diag_handler*/
static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *info)
{
struct sctp_infox *infox = (struct sctp_infox *)info;
if (infox->asoc) {
r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
r->idiag_wqueue = infox->asoc->sndbuf_used;
} else {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
}
if (infox->sctpinfo)
sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
}
static int sctp_diag_dump_one(struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
union sctp_addr laddr, paddr;
struct sctp_comm_param commp = {
.skb = in_skb,
.r = req,
.nlh = nlh,
.net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
};
if (req->sdiag_family == AF_INET) {
laddr.v4.sin_port = req->id.idiag_sport;
laddr.v4.sin_addr.s_addr = req->id.idiag_src[0];
laddr.v4.sin_family = AF_INET;
paddr.v4.sin_port = req->id.idiag_dport;
paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0];
paddr.v4.sin_family = AF_INET;
} else {
laddr.v6.sin6_port = req->id.idiag_sport;
memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
sizeof(laddr.v6.sin6_addr));
laddr.v6.sin6_family = AF_INET6;
paddr.v6.sin6_port = req->id.idiag_dport;
memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
sizeof(paddr.v6.sin6_addr));
paddr.v6.sin6_family = AF_INET6;
}
return sctp_transport_lookup_process(sctp_tsp_dump_one,
net, &laddr, &paddr, &commp);
}
static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
u32 idiag_states = r->idiag_states;
struct net *net = sock_net(skb->sk);
struct sctp_comm_param commp = {
.skb = skb,
.cb = cb,
.r = r,
.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
};
int pos = cb->args[2];
/* eps hashtable dumps
* args:
* 0 : if it will traversal listen sock
* 1 : to record the sock pos of this time's traversal
* 4 : to work as a temporary variable to traversal list
*/
if (cb->args[0] == 0) {
if (!(idiag_states & TCPF_LISTEN))
goto skip;
if (sctp_for_each_endpoint(sctp_ep_dump, &commp))
goto done;
skip:
cb->args[0] = 1;
cb->args[1] = 0;
cb->args[4] = 0;
}
/* asocs by transport hashtable dump
* args:
* 1 : to record the assoc pos of this time's traversal
* 2 : to record the transport pos of this time's traversal
* 3 : to mark if we have dumped the ep info of the current asoc
* 4 : to work as a temporary variable to traversal list
* 5 : to save the sk we get from travelsing the tsp list.
*/
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
net, &pos, &commp);
cb->args[2] = pos;
done:
cb->args[1] = cb->args[4];
cb->args[4] = 0;
}
static const struct inet_diag_handler sctp_diag_handler = {
.dump = sctp_diag_dump,
.dump_one = sctp_diag_dump_one,
.idiag_get_info = sctp_diag_get_info,
.idiag_type = IPPROTO_SCTP,
.idiag_info_size = sizeof(struct sctp_info),
};
static int __init sctp_diag_init(void)
{
return inet_diag_register(&sctp_diag_handler);
}
static void __exit sctp_diag_exit(void)
{
inet_diag_unregister(&sctp_diag_handler);
}
module_init(sctp_diag_init);
module_exit(sctp_diag_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);