kernel_optimize_test/include/net/af_unix.h
Miklos Szeredi 1fd05ba5a2 [AF_UNIX]: Rewrite garbage collector, fixes race.
Throw out the old mark & sweep garbage collector and put in a
refcounting cycle detecting one.

The old one had a race with recvmsg, that resulted in false positives
and hence data loss.  The old algorithm operated on all unix sockets
in the system, so any additional locking would have meant performance
problems for all users of these.

The new algorithm instead only operates on "in flight" sockets, which
are very rare, and the additional locking for these doesn't negatively
impact the vast majority of users.

In fact it's probable, that there weren't *any* heavy senders of
sockets over sockets, otherwise the above race would have been
discovered long ago.

The patch works OK with the app that exposed the race with the old
code.  The garbage collection has also been verified to work in a few
simple cases.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-07-11 14:22:39 -07:00

100 lines
2.5 KiB
C

#ifndef __LINUX_NET_AFUNIX_H
#define __LINUX_NET_AFUNIX_H
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/mutex.h>
#include <net/sock.h>
extern void unix_inflight(struct file *fp);
extern void unix_notinflight(struct file *fp);
extern void unix_gc(void);
#define UNIX_HASH_SIZE 256
extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
extern spinlock_t unix_table_lock;
extern atomic_t unix_tot_inflight;
static inline struct sock *first_unix_socket(int *i)
{
for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
if (!hlist_empty(&unix_socket_table[*i]))
return __sk_head(&unix_socket_table[*i]);
}
return NULL;
}
static inline struct sock *next_unix_socket(int *i, struct sock *s)
{
struct sock *next = sk_next(s);
/* More in this chain? */
if (next)
return next;
/* Look for next non-empty chain. */
for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
if (!hlist_empty(&unix_socket_table[*i]))
return __sk_head(&unix_socket_table[*i]);
}
return NULL;
}
#define forall_unix_sockets(i, s) \
for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
struct unix_address {
atomic_t refcnt;
int len;
unsigned hash;
struct sockaddr_un name[0];
};
struct unix_skb_parms {
struct ucred creds; /* Skb credentials */
struct scm_fp_list *fp; /* Passed files */
#ifdef CONFIG_SECURITY_NETWORK
u32 secid; /* Security ID */
#endif
};
#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb))
#define UNIXCREDS(skb) (&UNIXCB((skb)).creds)
#define UNIXSID(skb) (&UNIXCB((skb)).secid)
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
#define unix_state_lock_nested(s) \
spin_lock_nested(&unix_sk(s)->lock, \
SINGLE_DEPTH_NESTING)
#ifdef __KERNEL__
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
struct unix_address *addr;
struct dentry *dentry;
struct vfsmount *mnt;
struct mutex readlock;
struct sock *peer;
struct sock *other;
struct list_head link;
atomic_t inflight;
spinlock_t lock;
unsigned int gc_candidate : 1;
wait_queue_head_t peer_wait;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
#ifdef CONFIG_SYSCTL
extern int sysctl_unix_max_dgram_qlen;
extern void unix_sysctl_register(void);
extern void unix_sysctl_unregister(void);
#else
static inline void unix_sysctl_register(void) {}
static inline void unix_sysctl_unregister(void) {}
#endif
#endif
#endif