Merge branch 'master' of git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says: ==================== * Remove limitation in the maximum number of supported sets in ipset. Now ipset automagically increments the number of slots in the array of sets by 64 new spare slots, from Jozsef Kadlecsik. * Partially remove the generic queue infrastructure now that ip_queue is gone. Its only client is nfnetlink_queue now, from Florian Westphal. * Add missing attribute policy checkings in ctnetlink, from Florian Westphal. * Automagically kill conntrack entries that use the wrong output interface for the masquerading case in case of routing changes, from Jozsef Kadlecsik. * Two patches two improve ct object traceability. Now ct objects are always placed in any of the existing lists. This allows us to dump the content of unconfirmed and dying conntracks via ctnetlink as a way to provide more instrumentation in case you suspect leaks, from myself. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2012-12-04 13:01:19 -05:00 · 2012-12-04 13:01:19 -05:00 · e8ad1a8fab
commit e8ad1a8fab
parent 099f7aa740 a0ecb85a2c
13 changed files with 332 additions and 259 deletions
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@ -182,7 +182,7 @@ __nf_conntrack_find(struct net *net, u16 zone,

 extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 extern void nf_ct_delete_from_lists(struct nf_conn *ct);
-extern void nf_ct_insert_dying_list(struct nf_conn *ct);
+extern void nf_ct_dying_timeout(struct nf_conn *ct);

 extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);

--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@ -68,4 +68,19 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 #endif
 }

+static inline bool nf_nat_oif_changed(unsigned int hooknum,
+				      enum ip_conntrack_info ctinfo,
+				      struct nf_conn_nat *nat,
+				      const struct net_device *out)
+{
+#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+    IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+	return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
+	       CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
+	       nat->masq_index != out->ifindex;
+#else
+	return false;
+#endif
+}
+
 #endif
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@ -21,14 +21,10 @@ struct nf_queue_entry {
 struct nf_queue_handler {
 	int			(*outfn)(struct nf_queue_entry *entry,
 					 unsigned int queuenum);
-	char			*name;
 };

-extern int nf_register_queue_handler(u_int8_t pf,
-				     const struct nf_queue_handler *qh);
-extern int nf_unregister_queue_handler(u_int8_t pf,
-				       const struct nf_queue_handler *qh);
-extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
+void nf_register_queue_handler(const struct nf_queue_handler *qh);
+void nf_unregister_queue_handler(void);
 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);

 #endif /* _NF_QUEUE_H */
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@ -9,6 +9,8 @@ enum cntl_msg_types {
 	IPCTNL_MSG_CT_GET_CTRZERO,
 	IPCTNL_MSG_CT_GET_STATS_CPU,
 	IPCTNL_MSG_CT_GET_STATS,
+	IPCTNL_MSG_CT_GET_DYING,
+	IPCTNL_MSG_CT_GET_UNCONFIRMED,

 	IPCTNL_MSG_MAX
 };
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
+			nf_ct_kill_acct(ct, ctinfo, skb);
+			return NF_DROP;
+		}
 	}

 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
+			nf_ct_kill_acct(ct, ctinfo, skb);
+			return NF_DROP;
+		}
 	}

 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@ -295,8 +295,6 @@ void __init netfilter_init(void)
 		panic("cannot create netfilter proc entry");
 #endif

-	if (netfilter_queue_init() < 0)
-		panic("cannot initialize nf_queue");
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
 }
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@ -28,9 +28,10 @@ static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
 static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */

-static struct ip_set **ip_set_list;		/* all individual sets */
+static struct ip_set * __rcu *ip_set_list;	/* all individual sets */
 static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */

+#define IP_SET_INC	64
 #define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)

 static unsigned int max_sets;
@ -42,6 +43,12 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 MODULE_DESCRIPTION("core IP set support");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);

+/* When the nfnl mutex is held: */
+#define nfnl_dereference(p)		\
+	rcu_dereference_protected(p, 1)
+#define nfnl_set(id)			\
+	nfnl_dereference(ip_set_list)[id]
+
 /*
 * The set types are implemented in modules and registered set types
 * can be found in ip_set_type_list. Adding/deleting types is
@ -321,19 +328,19 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 */

 static inline void
-__ip_set_get(ip_set_id_t index)
+__ip_set_get(struct ip_set *set)
 {
 	write_lock_bh(&ip_set_ref_lock);
-	ip_set_list[index]->ref++;
+	set->ref++;
 	write_unlock_bh(&ip_set_ref_lock);
 }

 static inline void
-__ip_set_put(ip_set_id_t index)
+__ip_set_put(struct ip_set *set)
 {
 	write_lock_bh(&ip_set_ref_lock);
-	BUG_ON(ip_set_list[index]->ref == 0);
-	ip_set_list[index]->ref--;
+	BUG_ON(set->ref == 0);
+	set->ref--;
 	write_unlock_bh(&ip_set_ref_lock);
 }

@ -344,12 +351,25 @@ __ip_set_put(ip_set_id_t index)
 * so it can't be destroyed (or changed) under our foot.
 */

+static inline struct ip_set *
+ip_set_rcu_get(ip_set_id_t index)
+{
+	struct ip_set *set;
+
+	rcu_read_lock();
+	/* ip_set_list itself needs to be protected */
+	set = rcu_dereference(ip_set_list)[index];
+	rcu_read_unlock();
+
+	return set;
+}
+
 int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par,
 	    const struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_list[index];
+	struct ip_set *set = ip_set_rcu_get(index);
 	int ret = 0;

 	BUG_ON(set == NULL);
@ -388,7 +408,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par,
 	   const struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_list[index];
+	struct ip_set *set = ip_set_rcu_get(index);
 	int ret;

 	BUG_ON(set == NULL);
@ -411,7 +431,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par,
 	   const struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_list[index];
+	struct ip_set *set = ip_set_rcu_get(index);
 	int ret = 0;

 	BUG_ON(set == NULL);
@ -440,14 +460,17 @@ ip_set_get_byname(const char *name, struct ip_set **set)
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;

+	rcu_read_lock();
 	for (i = 0; i < ip_set_max; i++) {
-		s = ip_set_list[i];
+		s = rcu_dereference(ip_set_list)[i];
 		if (s != NULL && STREQ(s->name, name)) {
-			__ip_set_get(i);
+			__ip_set_get(s);
 			index = i;
 			*set = s;
+			break;
 		}
 	}
+	rcu_read_unlock();

 	return index;
 }
@ -462,8 +485,13 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
 void
 ip_set_put_byindex(ip_set_id_t index)
 {
-	if (ip_set_list[index] != NULL)
-		__ip_set_put(index);
+	struct ip_set *set;
+
+	rcu_read_lock();
+	set = rcu_dereference(ip_set_list)[index];
+	if (set != NULL)
+		__ip_set_put(set);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);

@ -477,7 +505,7 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 const char *
 ip_set_name_byindex(ip_set_id_t index)
 {
-	const struct ip_set *set = ip_set_list[index];
+	const struct ip_set *set = ip_set_rcu_get(index);

 	BUG_ON(set == NULL);
 	BUG_ON(set->ref == 0);
@ -501,11 +529,18 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 ip_set_id_t
 ip_set_nfnl_get(const char *name)
 {
+	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
-	ip_set_id_t index;

 	nfnl_lock();
-	index = ip_set_get_byname(name, &s);
+	for (i = 0; i < ip_set_max; i++) {
+		s = nfnl_set(i);
+		if (s != NULL && STREQ(s->name, name)) {
+			__ip_set_get(s);
+			index = i;
+			break;
+		}
+	}
 	nfnl_unlock();

 	return index;
@ -521,12 +556,15 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
 ip_set_id_t
 ip_set_nfnl_get_byindex(ip_set_id_t index)
 {
+	struct ip_set *set;
+
 	if (index > ip_set_max)
 		return IPSET_INVALID_ID;

 	nfnl_lock();
-	if (ip_set_list[index])
-		__ip_set_get(index);
+	set = nfnl_set(index);
+	if (set)
+		__ip_set_get(set);
 	else
 		index = IPSET_INVALID_ID;
 	nfnl_unlock();
@ -545,8 +583,11 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
 void
 ip_set_nfnl_put(ip_set_id_t index)
 {
+	struct ip_set *set;
 	nfnl_lock();
-	ip_set_put_byindex(index);
+	set = nfnl_set(index);
+	if (set != NULL)
+		__ip_set_put(set);
 	nfnl_unlock();
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@ -603,41 +644,46 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
 };

-static ip_set_id_t
-find_set_id(const char *name)
+static struct ip_set *
+find_set_and_id(const char *name, ip_set_id_t *id)
 {
-	ip_set_id_t i, index = IPSET_INVALID_ID;
-	const struct ip_set *set;
+	struct ip_set *set = NULL;
+	ip_set_id_t i;

-	for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
-		set = ip_set_list[i];
-		if (set != NULL && STREQ(set->name, name))
-			index = i;
+	*id = IPSET_INVALID_ID;
+	for (i = 0; i < ip_set_max; i++) {
+		set = nfnl_set(i);
+		if (set != NULL && STREQ(set->name, name)) {
+			*id = i;
+			break;
+		}
 	}
-	return index;
+	return (*id == IPSET_INVALID_ID ? NULL : set);
 }

 static inline struct ip_set *
 find_set(const char *name)
 {
-	ip_set_id_t index = find_set_id(name);
+	ip_set_id_t id;

-	return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
+	return find_set_and_id(name, &id);
 }

 static int
 find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
 {
+	struct ip_set *s;
 	ip_set_id_t i;

 	*index = IPSET_INVALID_ID;
 	for (i = 0;  i < ip_set_max; i++) {
-		if (ip_set_list[i] == NULL) {
+		s = nfnl_set(i);
+		if (s == NULL) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
-		} else if (STREQ(name, ip_set_list[i]->name)) {
+		} else if (STREQ(name, s->name)) {
 			/* Name clash */
-			*set = ip_set_list[i];
+			*set = s;
 			return -EEXIST;
 		}
 	}
@ -730,10 +776,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * and check clashing.
 	 */
 	ret = find_free_id(set->name, &index, &clash);
-	if (ret != 0) {
+	if (ret == -EEXIST) {
 		/* If this is the same set and requested, ignore error */
-		if (ret == -EEXIST &&
-		    (flags & IPSET_FLAG_EXIST) &&
+		if ((flags & IPSET_FLAG_EXIST) &&
 		    STREQ(set->type->name, clash->type->name) &&
 		    set->type->family == clash->type->family &&
 		    set->type->revision_min == clash->type->revision_min &&
@ -741,13 +786,36 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		    set->variant->same_set(set, clash))
 			ret = 0;
 		goto cleanup;
-	}
+	} else if (ret == -IPSET_ERR_MAX_SETS) {
+		struct ip_set **list, **tmp;
+		ip_set_id_t i = ip_set_max + IP_SET_INC;
+
+		if (i < ip_set_max || i == IPSET_INVALID_ID)
+			/* Wraparound */
+			goto cleanup;
+
+		list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
+		if (!list)
+			goto cleanup;
+		/* nfnl mutex is held, both lists are valid */
+		tmp = nfnl_dereference(ip_set_list);
+		memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
+		rcu_assign_pointer(ip_set_list, list);
+		/* Make sure all current packets have passed through */
+		synchronize_net();
+		/* Use new list */
+		index = ip_set_max;
+		ip_set_max = i;
+		kfree(tmp);
+		ret = 0;
+	} else if (ret)
+		goto cleanup;

 	/*
 	 * Finally! Add our shiny new set to the list, and be done.
 	 */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
-	ip_set_list[index] = set;
+	nfnl_set(index) = set;

 	return ret;

@ -772,10 +840,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
 static void
 ip_set_destroy_set(ip_set_id_t index)
 {
-	struct ip_set *set = ip_set_list[index];
+	struct ip_set *set = nfnl_set(index);

 	pr_debug("set: %s\n",  set->name);
-	ip_set_list[index] = NULL;
+	nfnl_set(index) = NULL;

 	/* Must call it without holding any lock */
 	set->variant->destroy(set);
@ -788,6 +856,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlmsghdr *nlh,
 	       const struct nlattr * const attr[])
 {
+	struct ip_set *s;
 	ip_set_id_t i;
 	int ret = 0;

@ -807,22 +876,24 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < ip_set_max; i++) {
-			if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
+			s = nfnl_set(i);
+			if (s != NULL && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
 		}
 		read_unlock_bh(&ip_set_ref_lock);
 		for (i = 0; i < ip_set_max; i++) {
-			if (ip_set_list[i] != NULL)
+			s = nfnl_set(i);
+			if (s != NULL)
 				ip_set_destroy_set(i);
 		}
 	} else {
-		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (i == IPSET_INVALID_ID) {
+		s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
+		if (s == NULL) {
 			ret = -ENOENT;
 			goto out;
-		} else if (ip_set_list[i]->ref) {
+		} else if (s->ref) {
 			ret = -IPSET_ERR_BUSY;
 			goto out;
 		}
@ -853,21 +924,24 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set *s;
 	ip_set_id_t i;

 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;

 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++)
-			if (ip_set_list[i] != NULL)
-				ip_set_flush_set(ip_set_list[i]);
+		for (i = 0; i < ip_set_max; i++) {
+			s = nfnl_set(i);
+			if (s != NULL)
+				ip_set_flush_set(s);
+		}
 	} else {
-		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (i == IPSET_INVALID_ID)
+		s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+		if (s == NULL)
 			return -ENOENT;

-		ip_set_flush_set(ip_set_list[i]);
+		ip_set_flush_set(s);
 	}

 	return 0;
@ -889,7 +963,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
-	struct ip_set *set;
+	struct ip_set *set, *s;
 	const char *name2;
 	ip_set_id_t i;
 	int ret = 0;
@ -911,8 +985,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,

 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < ip_set_max; i++) {
-		if (ip_set_list[i] != NULL &&
-		    STREQ(ip_set_list[i]->name, name2)) {
+		s = nfnl_set(i);
+		if (s != NULL && STREQ(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
 		}
@ -947,17 +1021,14 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;

-	from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (from_id == IPSET_INVALID_ID)
+	from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
+	if (from == NULL)
 		return -ENOENT;

-	to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
-	if (to_id == IPSET_INVALID_ID)
+	to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
+	if (to == NULL)
 		return -IPSET_ERR_EXIST_SETNAME2;

-	from = ip_set_list[from_id];
-	to = ip_set_list[to_id];
-
 	/* Features must not change.
 	 * Not an artificial restriction anymore, as we must prevent
 	 * possible loops created by swapping in setlist type of sets. */
@ -971,8 +1042,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,

 	write_lock_bh(&ip_set_ref_lock);
 	swap(from->ref, to->ref);
-	ip_set_list[from_id] = to;
-	ip_set_list[to_id] = from;
+	nfnl_set(from_id) = to;
+	nfnl_set(to_id) = from;
 	write_unlock_bh(&ip_set_ref_lock);

 	return 0;
@ -992,7 +1063,7 @@ static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
 	if (cb->args[2]) {
-		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
+		pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
 		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
 	}
 	return 0;
@ -1030,8 +1101,11 @@ dump_init(struct netlink_callback *cb)
 	 */

 	if (cda[IPSET_ATTR_SETNAME]) {
-		index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
-		if (index == IPSET_INVALID_ID)
+		struct ip_set *set;
+
+		set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
+				      &index);
+		if (set == NULL)
 			return -ENOENT;

 		dump_type = DUMP_ONE;
@ -1081,7 +1155,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		 dump_type, dump_flags, cb->args[1]);
 	for (; cb->args[1] < max; cb->args[1]++) {
 		index = (ip_set_id_t) cb->args[1];
-		set = ip_set_list[index];
+		set = nfnl_set(index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
 				ret = -ENOENT;
@ -1100,7 +1174,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		if (!cb->args[2]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
-			__ip_set_get(index);
+			__ip_set_get(set);
 		}
 		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, flags,
@ -1159,7 +1233,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[2]) {
-		pr_debug("release set %s\n", ip_set_list[index]->name);
+		pr_debug("release set %s\n", nfnl_set(index)->name);
 		ip_set_put_byindex(index);
 		cb->args[2] = 0;
 	}
@ -1409,17 +1483,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
-	ip_set_id_t index;
 	int ret = 0;

 	if (unlikely(protocol_failed(attr) ||
 		     attr[IPSET_ATTR_SETNAME] == NULL))
 		return -IPSET_ERR_PROTOCOL;

-	index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (index == IPSET_INVALID_ID)
+	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (set == NULL)
 		return -ENOENT;
-	set = ip_set_list[index];

 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (skb2 == NULL)
@ -1684,6 +1756,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 	}
 	case IP_SET_OP_GET_BYNAME: {
 		struct ip_set_req_get_set *req_get = data;
+		ip_set_id_t id;

 		if (*len != sizeof(struct ip_set_req_get_set)) {
 			ret = -EINVAL;
@ -1691,12 +1764,14 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock();
-		req_get->set.index = find_set_id(req_get->set.name);
+		find_set_and_id(req_get->set.name, &id);
+		req_get->set.index = id;
 		nfnl_unlock();
 		goto copy;
 	}
 	case IP_SET_OP_GET_BYINDEX: {
 		struct ip_set_req_get_set *req_get = data;
+		struct ip_set *set;

 		if (*len != sizeof(struct ip_set_req_get_set) ||
 		    req_get->set.index >= ip_set_max) {
@ -1704,9 +1779,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			goto done;
 		}
 		nfnl_lock();
-		strncpy(req_get->set.name,
-			ip_set_list[req_get->set.index]
-				? ip_set_list[req_get->set.index]->name : "",
+		set = nfnl_set(req_get->set.index);
+		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
 		nfnl_unlock();
 		goto copy;
@ -1737,6 +1811,7 @@ static struct nf_sockopt_ops so_set __read_mostly = {
 static int __init
 ip_set_init(void)
 {
+	struct ip_set **list;
 	int ret;

 	if (max_sets)
@ -1744,22 +1819,22 @@ ip_set_init(void)
 	if (ip_set_max >= IPSET_INVALID_ID)
 		ip_set_max = IPSET_INVALID_ID - 1;

-	ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
-			      GFP_KERNEL);
-	if (!ip_set_list)
+	list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
+	if (!list)
 		return -ENOMEM;

+	rcu_assign_pointer(ip_set_list, list);
 	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
-		kfree(ip_set_list);
+		kfree(list);
 		return ret;
 	}
 	ret = nf_register_sockopt(&so_set);
 	if (ret != 0) {
 		pr_err("SO_SET registry failed: %d\n", ret);
 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-		kfree(ip_set_list);
+		kfree(list);
 		return ret;
 	}

@ -1770,10 +1845,12 @@ ip_set_init(void)
 static void __exit
 ip_set_fini(void)
 {
+	struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
+
 	/* There can't be any existing set */
 	nf_unregister_sockopt(&so_set);
 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-	kfree(ip_set_list);
+	kfree(list);
 	pr_debug("these are the famous last words\n");
 }

--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@ -221,11 +221,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	 * too. */
 	nf_ct_remove_expectations(ct);

-	/* We overload first tuple to link into unconfirmed list. */
-	if (!nf_ct_is_confirmed(ct)) {
-		BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
-		hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
-	}
+	/* We overload first tuple to link into unconfirmed or dying list.*/
+	BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+	hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);

 	NF_CT_STAT_INC(net, delete);
 	spin_unlock_bh(&nf_conntrack_lock);
@ -247,6 +245,9 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
 	 * Otherwise we can get spurious warnings. */
 	NF_CT_STAT_INC(net, delete_list);
 	clean_from_lists(ct);
+	/* add this conntrack to the dying list */
+	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+			     &net->ct.dying);
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
@ -268,31 +269,23 @@ static void death_by_event(unsigned long ul_conntrack)
 	}
 	/* we've got the event delivered, now it's dying */
 	set_bit(IPS_DYING_BIT, &ct->status);
-	spin_lock(&nf_conntrack_lock);
-	hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
-	spin_unlock(&nf_conntrack_lock);
 	nf_ct_put(ct);
 }

-void nf_ct_insert_dying_list(struct nf_conn *ct)
+void nf_ct_dying_timeout(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);

 	BUG_ON(ecache == NULL);

-	/* add this conntrack to the dying list */
-	spin_lock_bh(&nf_conntrack_lock);
-	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-			     &net->ct.dying);
-	spin_unlock_bh(&nf_conntrack_lock);
 	/* set a new timer to retry event delivery */
 	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
 	ecache->timeout.expires = jiffies +
 		(random32() % net->ct.sysctl_events_retry_timeout);
 	add_timer(&ecache->timeout);
 }
-EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
+EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);

 static void death_by_timeout(unsigned long ul_conntrack)
 {
@ -307,7 +300,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	    unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
 		/* destroy event was not delivered */
 		nf_ct_delete_from_lists(ct);
-		nf_ct_insert_dying_list(ct);
+		nf_ct_dying_timeout(ct);
 		return;
 	}
 	set_bit(IPS_DYING_BIT, &ct->status);
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@ -898,7 +898,8 @@ ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
 }

 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
-	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
+	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING,
+				    .len = NF_CT_HELPER_NAME_LEN - 1 },
 };

 static inline int
@ -932,6 +933,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_ID]		= { .type = NLA_U32 },
 	[CTA_NAT_DST]		= { .type = NLA_NESTED },
 	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
+	[CTA_NAT_SEQ_ADJ_ORIG]  = { .type = NLA_NESTED },
+	[CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
 	[CTA_ZONE]		= { .type = NLA_U16 },
 	[CTA_MARK_MASK]		= { .type = NLA_U32 },
 };
@ -989,7 +992,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 					      nlmsg_report(nlh)) < 0) {
 			nf_ct_delete_from_lists(ct);
 			/* we failed to report the event, try later */
-			nf_ct_insert_dying_list(ct);
+			nf_ct_dying_timeout(ct);
 			nf_ct_put(ct);
 			return 0;
 		}
@ -1089,6 +1092,112 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	return err == -EAGAIN ? -ENOBUFS : err;
 }

+static int ctnetlink_done_list(struct netlink_callback *cb)
+{
+	if (cb->args[1])
+		nf_ct_put((struct nf_conn *)cb->args[1]);
+	return 0;
+}
+
+static int
+ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
+		    struct hlist_nulls_head *list)
+{
+	struct nf_conn *ct, *last;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+	int res;
+
+	if (cb->args[2])
+		return 0;
+
+	spin_lock_bh(&nf_conntrack_lock);
+	last = (struct nf_conn *)cb->args[1];
+restart:
+	hlist_nulls_for_each_entry(h, n, list, hnnode) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (l3proto && nf_ct_l3num(ct) != l3proto)
+			continue;
+		if (cb->args[1]) {
+			if (ct != last)
+				continue;
+			cb->args[1] = 0;
+		}
+		rcu_read_lock();
+		res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					  cb->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+					  ct);
+		rcu_read_unlock();
+		if (res < 0) {
+			nf_conntrack_get(&ct->ct_general);
+			cb->args[1] = (unsigned long)ct;
+			goto out;
+		}
+	}
+	if (cb->args[1]) {
+		cb->args[1] = 0;
+		goto restart;
+	} else
+		cb->args[2] = 1;
+out:
+	spin_unlock_bh(&nf_conntrack_lock);
+	if (last)
+		nf_ct_put(last);
+
+	return skb->len;
+}
+
+static int
+ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+
+	return ctnetlink_dump_list(skb, cb, &net->ct.dying);
+}
+
+static int
+ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
+		       const struct nlmsghdr *nlh,
+		       const struct nlattr * const cda[])
+{
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_dump_dying,
+			.done = ctnetlink_done_list,
+		};
+		return netlink_dump_start(ctnl, skb, nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+
+	return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
+}
+
+static int
+ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const cda[])
+{
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_dump_unconfirmed,
+			.done = ctnetlink_done_list,
+		};
+		return netlink_dump_start(ctnl, skb, nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 #ifdef CONFIG_NF_NAT_NEEDED
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
@ -2216,7 +2325,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
 	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
 	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
 	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
-	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING },
+	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = NF_CT_HELPER_NAME_LEN - 1 },
 	[CTA_EXPECT_ZONE]	= { .type = NLA_U16 },
 	[CTA_EXPECT_FLAGS]	= { .type = NLA_U32 },
 	[CTA_EXPECT_CLASS]	= { .type = NLA_U32 },
@ -2712,6 +2822,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
 					    .policy = ct_nla_policy },
 	[IPCTNL_MSG_CT_GET_STATS_CPU]	= { .call = ctnetlink_stat_ct_cpu },
 	[IPCTNL_MSG_CT_GET_STATS]	= { .call = ctnetlink_stat_ct },
+	[IPCTNL_MSG_CT_GET_DYING]	= { .call = ctnetlink_get_ct_dying },
+	[IPCTNL_MSG_CT_GET_UNCONFIRMED]	= { .call = ctnetlink_get_ct_unconfirmed },
 };

 static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@ -1353,6 +1353,8 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
 };
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */

--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@ -14,84 +14,32 @@
 #include "nf_internals.h"

 /*
- * A queue handler may be registered for each protocol.  Each is protected by
- * long term mutex.  The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
+ * Hook for nfnetlink_queue to register its queue handler.
+ * We do this so that most of the NFQUEUE code can be modular.
+ *
+ * Once the queue is registered it must reinject all packets it
+ * receives, no matter what.
 */
-static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
-
-static DEFINE_MUTEX(queue_handler_mutex);
+static const struct nf_queue_handler __rcu *queue_handler __read_mostly;

 /* return EBUSY when somebody else is registered, return EEXIST if the
 * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+void nf_register_queue_handler(const struct nf_queue_handler *qh)
 {
-	int ret;
-	const struct nf_queue_handler *old;
-
-	if (pf >= ARRAY_SIZE(queue_handler))
-		return -EINVAL;
-
-	mutex_lock(&queue_handler_mutex);
-	old = rcu_dereference_protected(queue_handler[pf],
-					lockdep_is_held(&queue_handler_mutex));
-	if (old == qh)
-		ret = -EEXIST;
-	else if (old)
-		ret = -EBUSY;
-	else {
-		rcu_assign_pointer(queue_handler[pf], qh);
-		ret = 0;
-	}
-	mutex_unlock(&queue_handler_mutex);
-
-	return ret;
+	/* should never happen, we only have one queueing backend in kernel */
+	WARN_ON(rcu_access_pointer(queue_handler));
+	rcu_assign_pointer(queue_handler, qh);
 }
 EXPORT_SYMBOL(nf_register_queue_handler);

 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+void nf_unregister_queue_handler(void)
 {
-	const struct nf_queue_handler *old;
-
-	if (pf >= ARRAY_SIZE(queue_handler))
-		return -EINVAL;
-
-	mutex_lock(&queue_handler_mutex);
-	old = rcu_dereference_protected(queue_handler[pf],
-					lockdep_is_held(&queue_handler_mutex));
-	if (old && old != qh) {
-		mutex_unlock(&queue_handler_mutex);
-		return -EINVAL;
-	}
-
-	RCU_INIT_POINTER(queue_handler[pf], NULL);
-	mutex_unlock(&queue_handler_mutex);
-
+	RCU_INIT_POINTER(queue_handler, NULL);
 	synchronize_rcu();
-
-	return 0;
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);

-void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
-{
-	u_int8_t pf;
-
-	mutex_lock(&queue_handler_mutex);
-	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-		if (rcu_dereference_protected(
-				queue_handler[pf],
-				lockdep_is_held(&queue_handler_mutex)
-				) == qh)
-			RCU_INIT_POINTER(queue_handler[pf], NULL);
-	}
-	mutex_unlock(&queue_handler_mutex);
-
-	synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
-
 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	/* Release those devices we held, or Alexey will kill me. */
@ -137,7 +85,7 @@ static int __nf_queue(struct sk_buff *skb,
 	/* QUEUE == DROP if no one is waiting, to be safe. */
 	rcu_read_lock();

-	qh = rcu_dereference(queue_handler[pf]);
+	qh = rcu_dereference(queue_handler);
 	if (!qh) {
 		status = -ESRCH;
 		goto err_unlock;
@ -344,77 +292,3 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	kfree(entry);
 }
 EXPORT_SYMBOL(nf_reinject);
-
-#ifdef CONFIG_PROC_FS
-static void *seq_start(struct seq_file *seq, loff_t *pos)
-{
-	if (*pos >= ARRAY_SIZE(queue_handler))
-		return NULL;
-
-	return pos;
-}
-
-static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	(*pos)++;
-
-	if (*pos >= ARRAY_SIZE(queue_handler))
-		return NULL;
-
-	return pos;
-}
-
-static void seq_stop(struct seq_file *s, void *v)
-{
-
-}
-
-static int seq_show(struct seq_file *s, void *v)
-{
-	int ret;
-	loff_t *pos = v;
-	const struct nf_queue_handler *qh;
-
-	rcu_read_lock();
-	qh = rcu_dereference(queue_handler[*pos]);
-	if (!qh)
-		ret = seq_printf(s, "%2lld NONE\n", *pos);
-	else
-		ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static const struct seq_operations nfqueue_seq_ops = {
-	.start	= seq_start,
-	.next	= seq_next,
-	.stop	= seq_stop,
-	.show	= seq_show,
-};
-
-static int nfqueue_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &nfqueue_seq_ops);
-}
-
-static const struct file_operations nfqueue_file_ops = {
-	.owner	 = THIS_MODULE,
-	.open	 = nfqueue_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-#endif /* PROC_FS */
-
-
-int __init netfilter_queue_init(void)
-{
-#ifdef CONFIG_PROC_FS
-	if (!proc_create("nf_queue", S_IRUGO,
-			 proc_net_netfilter, &nfqueue_file_ops))
-		return -1;
-#endif
-	return 0;
-}
-
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@ -809,7 +809,6 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
 };

 static const struct nf_queue_handler nfqh = {
-	.name 	= "nf_queue",
 	.outfn	= &nfqnl_enqueue_packet,
 };

@ -827,14 +826,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	if (nfqa[NFQA_CFG_CMD]) {
 		cmd = nla_data(nfqa[NFQA_CFG_CMD]);

-		/* Commands without queue context - might sleep */
+		/* Obsolete commands without queue context */
 		switch (cmd->command) {
-		case NFQNL_CFG_CMD_PF_BIND:
-			return nf_register_queue_handler(ntohs(cmd->pf),
-							 &nfqh);
-		case NFQNL_CFG_CMD_PF_UNBIND:
-			return nf_unregister_queue_handler(ntohs(cmd->pf),
-							   &nfqh);
+		case NFQNL_CFG_CMD_PF_BIND: return 0;
+		case NFQNL_CFG_CMD_PF_UNBIND: return 0;
 		}
 	}

@ -1074,6 +1069,7 @@ static int __init nfnetlink_queue_init(void)
 #endif

 	register_netdevice_notifier(&nfqnl_dev_notifier);
+	nf_register_queue_handler(&nfqh);
 	return status;

 #ifdef CONFIG_PROC_FS
@ -1087,7 +1083,7 @@ static int __init nfnetlink_queue_init(void)

 static void __exit nfnetlink_queue_fini(void)
 {
-	nf_unregister_queue_handlers(&nfqh);
+	nf_unregister_queue_handler();
 	unregister_netdevice_notifier(&nfqnl_dev_notifier);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nfnetlink_queue", proc_net_netfilter);