kernel_optimize_test/net/core/rtnetlink.c

731 lines
17 KiB
C
Raw Normal View History

/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Routing netlink socket interface: protocol independent part.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/string.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
DECLARE_MUTEX(rtnl_sem);
void rtnl_lock(void)
{
rtnl_shlock();
}
int rtnl_lock_interruptible(void)
{
return down_interruptible(&rtnl_sem);
}
void rtnl_unlock(void)
{
rtnl_shunlock();
netdev_run_todo();
}
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr*)*maxattr);
while (RTA_OK(rta, len)) {
unsigned flavor = rta->rta_type;
if (flavor && flavor <= maxattr)
tb[flavor-1] = rta;
rta = RTA_NEXT(rta, len);
}
return 0;
}
struct sock *rtnl;
struct rtnetlink_link * rtnetlink_links[NPROTO];
static const int rtm_min[RTM_NR_FAMILIES] =
{
[RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
[RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
[RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
[RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)),
[RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
[RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
[RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
};
static const int rta_max[RTM_NR_FAMILIES] =
{
[RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
[RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
[RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
[RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX,
[RTM_FAM(RTM_NEWRULE)] = RTA_MAX,
[RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
[RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
[RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
[RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
[RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
struct rtattr *rta;
int size = RTA_LENGTH(attrlen);
rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
rta->rta_type = attrtype;
rta->rta_len = size;
memcpy(RTA_DATA(rta), data, attrlen);
memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
}
size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
{
size_t ret = RTA_PAYLOAD(rta);
char *src = RTA_DATA(rta);
if (ret > 0 && src[ret - 1] == '\0')
ret--;
if (size > 0) {
size_t len = (ret >= size) ? size - 1 : ret;
memset(dest, 0, size);
memcpy(dest, src, len);
}
return ret;
}
int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
NETLINK_CB(skb).dst_group = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
return err;
}
int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
struct rtattr *mx = (struct rtattr*)skb->tail;
int i;
RTA_PUT(skb, RTA_METRICS, 0, NULL);
for (i=0; i<RTAX_MAX; i++) {
if (metrics[i])
RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
}
mx->rta_len = skb->tail - (u8*)mx;
if (mx->rta_len == RTA_LENGTH(0))
skb_trim(skb, (u8*)mx - skb->data);
return 0;
rtattr_failure:
skb_trim(skb, (u8*)mx - skb->data);
return -1;
}
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
r->__ifi_pad = 0;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev_get_flags(dev);
r->ifi_change = change;
RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
if (1) {
u32 txqlen = dev->tx_queue_len;
RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
}
if (1) {
u32 weight = dev->weight;
RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
}
if (1) {
struct rtnl_link_ifmap map = {
.mem_start = dev->mem_start,
.mem_end = dev->mem_end,
.base_addr = dev->base_addr,
.irq = dev->irq,
.dma = dev->dma,
.port = dev->if_port,
};
RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
}
if (dev->addr_len) {
RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
if (1) {
u32 mtu = dev->mtu;
RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
}
if (dev->ifindex != dev->iflink) {
u32 iflink = dev->iflink;
RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
}
if (dev->qdisc_sleeping)
RTA_PUT(skb, IFLA_QDISC,
strlen(dev->qdisc_sleeping->ops->id) + 1,
dev->qdisc_sleeping->ops->id);
if (dev->master) {
u32 master = dev->master->ifindex;
RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
}
if (dev->get_stats) {
unsigned long *stats = (unsigned long*)dev->get_stats(dev);
if (stats) {
struct rtattr *a;
__u32 *s;
int i;
int n = sizeof(struct rtnl_link_stats)/4;
a = __RTA_PUT(skb, IFLA_STATS, n*4);
s = RTA_DATA(a);
for (i=0; i<n; i++)
s[i] = stats[i];
}
}
nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI) <= 0)
break;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
}
static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ifinfomsg *ifm = NLMSG_DATA(nlh);
struct rtattr **ida = arg;
struct net_device *dev;
int err, send_addr_notify = 0;
if (ifm->ifi_index >= 0)
dev = dev_get_by_index(ifm->ifi_index);
else if (ida[IFLA_IFNAME - 1]) {
char ifname[IFNAMSIZ];
if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
IFNAMSIZ) >= IFNAMSIZ)
return -EINVAL;
dev = dev_get_by_name(ifname);
} else
return -EINVAL;
if (!dev)
return -ENODEV;
err = -EINVAL;
if (ifm->ifi_flags)
dev_change_flags(dev, ifm->ifi_flags);
if (ida[IFLA_MAP - 1]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
if (!dev->set_config) {
err = -EOPNOTSUPP;
goto out;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto out;
}
if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
goto out;
u_map = RTA_DATA(ida[IFLA_MAP - 1]);
k_map.mem_start = (unsigned long) u_map->mem_start;
k_map.mem_end = (unsigned long) u_map->mem_end;
k_map.base_addr = (unsigned short) u_map->base_addr;
k_map.irq = (unsigned char) u_map->irq;
k_map.dma = (unsigned char) u_map->dma;
k_map.port = (unsigned char) u_map->port;
err = dev->set_config(dev, &k_map);
if (err)
goto out;
}
if (ida[IFLA_ADDRESS - 1]) {
if (!dev->set_mac_address) {
err = -EOPNOTSUPP;
goto out;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto out;
}
if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
goto out;
err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
if (err)
goto out;
send_addr_notify = 1;
}
if (ida[IFLA_BROADCAST - 1]) {
if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
goto out;
memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
dev->addr_len);
send_addr_notify = 1;
}
if (ida[IFLA_MTU - 1]) {
if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
if (err)
goto out;
}
if (ida[IFLA_TXQLEN - 1]) {
if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
}
if (ida[IFLA_WEIGHT - 1]) {
if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
}
if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
char ifname[IFNAMSIZ];
if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
IFNAMSIZ) >= IFNAMSIZ)
goto out;
err = dev_change_name(dev, ifname);
if (err)
goto out;
}
err = 0;
out:
if (send_addr_notify)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
dev_put(dev);
return err;
}
static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
if (s_idx == 0)
s_idx = 1;
for (idx=1; idx<NPROTO; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
if (rtnetlink_links[idx] == NULL ||
rtnetlink_links[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
if (rtnetlink_links[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
return skb->len;
}
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
struct sk_buff *skb;
int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
sizeof(struct rtnl_link_ifmap) +
sizeof(struct rtnl_link_stats) + 128);
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
}
static int rtnetlink_done(struct netlink_callback *cb)
{
return 0;
}
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type];
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
u32 rlen;
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit,
rtnetlink_done)) != 0) {
return -1;
}
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
skb_pull(skb, rlen);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
/*
* Process one packet of messages.
* Malformed skbs with wrong lengths of messages are discarded silently.
*/
static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr * nlh;
while (skb->len >= NLMSG_SPACE(0)) {
u32 rlen;
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return 0;
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
if (rtnetlink_rcv_msg(skb, nlh, &err)) {
/* Not error, but we must interrupt processing here:
* Note, that in this case we do not pull message
* from skb, it will be processed later.
*/
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags&NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
}
return 0;
}
/*
* rtnetlink input queue processing routine:
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
* - process as much as there was in the queue upon entry.
* - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
* that will occur, when a dump started.
*/
static void rtnetlink_rcv(struct sock *sk, int len)
{
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
do {
struct sk_buff *skb;
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
rtnl_lock();
if (qlen > skb_queue_len(&sk->sk_receive_queue))
qlen = skb_queue_len(&sk->sk_receive_queue);
for (; qlen; qlen--) {
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
skb = skb_dequeue(&sk->sk_receive_queue);
if (rtnetlink_rcv_skb(skb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue,
skb);
else {
kfree_skb(skb);
qlen--;
}
break;
}
kfree_skb(skb);
}
up(&rtnl_sem);
netdev_run_todo();
[NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-04 05:55:09 +08:00
} while (qlen);
}
static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
{
[RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
[RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
[RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
[RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
[RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
[RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
[RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
};
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
break;
case NETDEV_REGISTER:
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
break;
case NETDEV_UP:
case NETDEV_DOWN:
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
break;
case NETDEV_CHANGE:
case NETDEV_GOING_DOWN:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block rtnetlink_dev_notifier = {
.notifier_call = rtnetlink_event,
};
void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}
EXPORT_SYMBOL(__rta_fill);
EXPORT_SYMBOL(rtattr_strlcpy);
EXPORT_SYMBOL(rtattr_parse);
EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_lock_interruptible);
EXPORT_SYMBOL(rtnl_sem);
EXPORT_SYMBOL(rtnl_unlock);