kernel_optimize_test/net/phonet/socket.c
Eric Dumazet 4381548237 net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.

RCU conversion is pretty much needed :

1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).

[Future patch will add a list anchor for wakeup coalescing]

2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().

3) Respect RCU grace period when freeing a "struct socket_wq"

4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"

5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep

6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.

7) Change all sk_has_sleeper() callers to :
  - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
  - Use wq_has_sleeper() to eventually wakeup tasks.
  - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)

8) sock_wake_async() is modified to use rcu protection as well.

9) Exceptions :
  macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.

Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-01 15:00:15 -07:00

566 lines
13 KiB
C

/*
* File: socket.c
*
* Phonet sockets
*
* Copyright (C) 2008 Nokia Corporation.
*
* Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
* Original author: Sakari Ailus <sakari.ailus@nokia.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/poll.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <linux/phonet.h>
#include <net/phonet/phonet.h>
#include <net/phonet/pep.h>
#include <net/phonet/pn_dev.h>
static int pn_socket_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
sock->sk = NULL;
sk->sk_prot->close(sk, 0);
}
return 0;
}
#define PN_HASHSIZE 16
#define PN_HASHMASK (PN_HASHSIZE-1)
static struct {
struct hlist_head hlist[PN_HASHSIZE];
spinlock_t lock;
} pnsocks;
void __init pn_sock_init(void)
{
unsigned i;
for (i = 0; i < PN_HASHSIZE; i++)
INIT_HLIST_HEAD(pnsocks.hlist + i);
spin_lock_init(&pnsocks.lock);
}
static struct hlist_head *pn_hash_list(u16 obj)
{
return pnsocks.hlist + (obj & PN_HASHMASK);
}
/*
* Find address based on socket address, match only certain fields.
* Also grab sock if it was found. Remember to sock_put it later.
*/
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
{
struct hlist_node *node;
struct sock *sknode;
struct sock *rval = NULL;
u16 obj = pn_sockaddr_get_object(spn);
u8 res = spn->spn_resource;
struct hlist_head *hlist = pn_hash_list(obj);
spin_lock_bh(&pnsocks.lock);
sk_for_each(sknode, node, hlist) {
struct pn_sock *pn = pn_sk(sknode);
BUG_ON(!pn->sobject); /* unbound socket */
if (!net_eq(sock_net(sknode), net))
continue;
if (pn_port(obj)) {
/* Look up socket by port */
if (pn_port(pn->sobject) != pn_port(obj))
continue;
} else {
/* If port is zero, look up by resource */
if (pn->resource != res)
continue;
}
if (pn_addr(pn->sobject) &&
pn_addr(pn->sobject) != pn_addr(obj))
continue;
rval = sknode;
sock_hold(sknode);
break;
}
spin_unlock_bh(&pnsocks.lock);
return rval;
}
/* Deliver a broadcast packet (only in bottom-half) */
void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
{
struct hlist_head *hlist = pnsocks.hlist;
unsigned h;
spin_lock(&pnsocks.lock);
for (h = 0; h < PN_HASHSIZE; h++) {
struct hlist_node *node;
struct sock *sknode;
sk_for_each(sknode, node, hlist) {
struct sk_buff *clone;
if (!net_eq(sock_net(sknode), net))
continue;
if (!sock_flag(sknode, SOCK_BROADCAST))
continue;
clone = skb_clone(skb, GFP_ATOMIC);
if (clone) {
sock_hold(sknode);
sk_receive_skb(sknode, clone, 0);
}
}
hlist++;
}
spin_unlock(&pnsocks.lock);
}
void pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
spin_lock_bh(&pnsocks.lock);
sk_add_node(sk, hlist);
spin_unlock_bh(&pnsocks.lock);
}
EXPORT_SYMBOL(pn_sock_hash);
void pn_sock_unhash(struct sock *sk)
{
spin_lock_bh(&pnsocks.lock);
sk_del_node_init(sk);
spin_unlock_bh(&pnsocks.lock);
}
EXPORT_SYMBOL(pn_sock_unhash);
static DEFINE_MUTEX(port_mutex);
static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
int err;
u16 handle;
u8 saddr;
if (sk->sk_prot->bind)
return sk->sk_prot->bind(sk, addr, len);
if (len < sizeof(struct sockaddr_pn))
return -EINVAL;
if (spn->spn_family != AF_PHONET)
return -EAFNOSUPPORT;
handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr);
saddr = pn_addr(handle);
if (saddr && phonet_address_lookup(sock_net(sk), saddr))
return -EADDRNOTAVAIL;
lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) {
err = -EINVAL; /* attempt to rebind */
goto out;
}
WARN_ON(sk_hashed(sk));
mutex_lock(&port_mutex);
err = sk->sk_prot->get_port(sk, pn_port(handle));
if (err)
goto out_port;
/* get_port() sets the port, bind() sets the address if applicable */
pn->sobject = pn_object(saddr, pn_port(pn->sobject));
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
release_sock(sk);
return err;
}
static int pn_socket_autobind(struct socket *sock)
{
struct sockaddr_pn sa;
int err;
memset(&sa, 0, sizeof(sa));
sa.spn_family = AF_PHONET;
err = pn_socket_bind(sock, (struct sockaddr *)&sa,
sizeof(struct sockaddr_pn));
if (err != -EINVAL)
return err;
BUG_ON(!pn_port(pn_sk(sock->sk)->sobject));
return 0; /* socket was already bound */
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
int flags)
{
struct sock *sk = sock->sk;
struct sock *newsk;
int err;
newsk = sk->sk_prot->accept(sk, flags, &err);
if (!newsk)
return err;
lock_sock(newsk);
sock_graft(newsk, newsock);
newsock->state = SS_CONNECTED;
release_sock(newsk);
return 0;
}
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
int *sockaddr_len, int peer)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
memset(addr, 0, sizeof(struct sockaddr_pn));
addr->sa_family = AF_PHONET;
if (!peer) /* Race with bind() here is userland's problem. */
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
pn->sobject);
*sockaddr_len = sizeof(struct sockaddr_pn);
return 0;
}
static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
unsigned int mask = 0;
poll_wait(file, sk_sleep(sk), wait);
switch (sk->sk_state) {
case TCP_LISTEN:
return hlist_empty(&pn->ackq) ? 0 : POLLIN;
case TCP_CLOSE:
return POLLERR;
}
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
if (!skb_queue_empty(&pn->ctrlreq_queue))
mask |= POLLPRI;
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
return POLLHUP;
if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
return mask;
}
static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
if (cmd == SIOCPNGETOBJECT) {
struct net_device *dev;
u16 handle;
u8 saddr;
if (get_user(handle, (__u16 __user *)arg))
return -EFAULT;
lock_sock(sk);
if (sk->sk_bound_dev_if)
dev = dev_get_by_index(sock_net(sk),
sk->sk_bound_dev_if);
else
dev = phonet_device_get(sock_net(sk));
if (dev && (dev->flags & IFF_UP))
saddr = phonet_address_get(dev, pn_addr(handle));
else
saddr = PN_NO_ADDR;
release_sock(sk);
if (dev)
dev_put(dev);
if (saddr == PN_NO_ADDR)
return -EHOSTUNREACH;
handle = pn_object(saddr, pn_port(pn->sobject));
return put_user(handle, (__u16 __user *)arg);
}
return sk->sk_prot->ioctl(sk, cmd, arg);
}
static int pn_socket_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
int err = 0;
if (sock->state != SS_UNCONNECTED)
return -EINVAL;
if (pn_socket_autobind(sock))
return -ENOBUFS;
lock_sock(sk);
if (sk->sk_state != TCP_CLOSE) {
err = -EINVAL;
goto out;
}
sk->sk_state = TCP_LISTEN;
sk->sk_ack_backlog = 0;
sk->sk_max_ack_backlog = backlog;
out:
release_sock(sk);
return err;
}
static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
if (pn_socket_autobind(sock))
return -EAGAIN;
return sk->sk_prot->sendmsg(iocb, sk, m, total_len);
}
const struct proto_ops phonet_dgram_ops = {
.family = AF_PHONET,
.owner = THIS_MODULE,
.release = pn_socket_release,
.bind = pn_socket_bind,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pn_socket_getname,
.poll = datagram_poll,
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getsockopt = sock_no_getsockopt,
#ifdef CONFIG_COMPAT
.compat_setsockopt = sock_no_setsockopt,
.compat_getsockopt = sock_no_getsockopt,
#endif
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
const struct proto_ops phonet_stream_ops = {
.family = AF_PHONET,
.owner = THIS_MODULE,
.release = pn_socket_release,
.bind = pn_socket_bind,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = pn_socket_accept,
.getname = pn_socket_getname,
.poll = pn_socket_poll,
.ioctl = pn_socket_ioctl,
.listen = pn_socket_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
EXPORT_SYMBOL(phonet_stream_ops);
/* allocate port for a socket */
int pn_sock_get_port(struct sock *sk, unsigned short sport)
{
static int port_cur;
struct net *net = sock_net(sk);
struct pn_sock *pn = pn_sk(sk);
struct sockaddr_pn try_sa;
struct sock *tmpsk;
memset(&try_sa, 0, sizeof(struct sockaddr_pn));
try_sa.spn_family = AF_PHONET;
WARN_ON(!mutex_is_locked(&port_mutex));
if (!sport) {
/* search free port */
int port, pmin, pmax;
phonet_get_local_port_range(&pmin, &pmax);
for (port = pmin; port <= pmax; port++) {
port_cur++;
if (port_cur < pmin || port_cur > pmax)
port_cur = pmin;
pn_sockaddr_set_port(&try_sa, port_cur);
tmpsk = pn_find_sock_by_sa(net, &try_sa);
if (tmpsk == NULL) {
sport = port_cur;
goto found;
} else
sock_put(tmpsk);
}
} else {
/* try to find specific port */
pn_sockaddr_set_port(&try_sa, sport);
tmpsk = pn_find_sock_by_sa(net, &try_sa);
if (tmpsk == NULL)
/* No sock there! We can use that port... */
goto found;
else
sock_put(tmpsk);
}
/* the port must be in use already */
return -EADDRINUSE;
found:
pn->sobject = pn_object(pn_addr(pn->sobject), sport);
return 0;
}
EXPORT_SYMBOL(pn_sock_get_port);
#ifdef CONFIG_PROC_FS
static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
struct hlist_head *hlist = pnsocks.hlist;
struct hlist_node *node;
struct sock *sknode;
unsigned h;
for (h = 0; h < PN_HASHSIZE; h++) {
sk_for_each(sknode, node, hlist) {
if (!net_eq(net, sock_net(sknode)))
continue;
if (!pos)
return sknode;
pos--;
}
hlist++;
}
return NULL;
}
static struct sock *pn_sock_get_next(struct seq_file *seq, struct sock *sk)
{
struct net *net = seq_file_net(seq);
do
sk = sk_next(sk);
while (sk && !net_eq(net, sock_net(sk)));
return sk;
}
static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(pnsocks.lock)
{
spin_lock_bh(&pnsocks.lock);
return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *pn_sock_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *sk;
if (v == SEQ_START_TOKEN)
sk = pn_sock_get_idx(seq, 0);
else
sk = pn_sock_get_next(seq, v);
(*pos)++;
return sk;
}
static void pn_sock_seq_stop(struct seq_file *seq, void *v)
__releases(pnsocks.lock)
{
spin_unlock_bh(&pnsocks.lock);
}
static int pn_sock_seq_show(struct seq_file *seq, void *v)
{
int len;
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%s%n", "pt loc rem rs st tx_queue rx_queue "
" uid inode ref pointer drops", &len);
else {
struct sock *sk = v;
struct pn_sock *pn = pn_sk(sk);
seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
"%d %p %d%n",
sk->sk_protocol, pn->sobject, 0, pn->resource,
sk->sk_state,
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
sock_i_uid(sk), sock_i_ino(sk),
atomic_read(&sk->sk_refcnt), sk,
atomic_read(&sk->sk_drops), &len);
}
seq_printf(seq, "%*s\n", 127 - len, "");
return 0;
}
static const struct seq_operations pn_sock_seq_ops = {
.start = pn_sock_seq_start,
.next = pn_sock_seq_next,
.stop = pn_sock_seq_stop,
.show = pn_sock_seq_show,
};
static int pn_sock_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &pn_sock_seq_ops,
sizeof(struct seq_net_private));
}
const struct file_operations pn_sock_seq_fops = {
.owner = THIS_MODULE,
.open = pn_sock_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
#endif