6bb0fef489
When netlink mmap on receive side is the consumer of nf queue data,
it can happen that in some edge cases, we write skb shared info into
the user space mmap buffer:
Assume a possible rx ring frame size of only 4096, and the network skb,
which is being zero-copied into the netlink skb, contains page frags
with an overall skb->len larger than the linear part of the netlink
skb.
skb_zerocopy(), which is generic and thus not aware of the fact that
shared info cannot be accessed for such skbs then tries to write and
fill frags, thus leaking kernel data/pointers and in some corner cases
possibly writing out of bounds of the mmap area (when filling the
last slot in the ring buffer this way).
I.e. the ring buffer slot is then of status NL_MMAP_STATUS_VALID, has
an advertised length larger than 4096, where the linear part is visible
at the slot beginning, and the leaked sizeof(struct skb_shared_info)
has been written to the beginning of the next slot (also corrupting
the struct nl_mmap_hdr slot header incl. status etc), since skb->end
points to skb->data + ring->frame_size - NL_MMAP_HDRLEN.
The fix adds and lets __netlink_alloc_skb() take the actual needed
linear room for the network skb + meta data into account. It's completely
irrelevant for non-mmaped netlink sockets, but in case mmap sockets
are used, it can be decided whether the available skb_tailroom() is
really large enough for the buffer, or whether it needs to internally
fallback to a normal alloc_skb().
>From nf queue side, the information whether the destination port is
an mmap RX ring is not really available without extra port-to-socket
lookup, thus it can only be determined in lower layers i.e. when
__netlink_alloc_skb() is called that checks internally for this. I
chose to add the extra ldiff parameter as mmap will then still work:
We have data_len and hlen in nfqnl_build_packet_message(), data_len
is the full length (capped at queue->copy_range) for skb_zerocopy()
and hlen some possible part of data_len that needs to be copied; the
rem_len variable indicates the needed remaining linear mmap space.
The only other workaround in nf queue internally would be after
allocation time by f.e. cap'ing the data_len to the skb_tailroom()
iff we deal with an mmap skb, but that would 1) expose the fact that
we use a mmap skb to upper layers, and 2) trim the skb where we
otherwise could just have moved the full skb into the normal receive
queue.
After the patch, in my test case the ring slot doesn't fit and therefore
shows NL_MMAP_STATUS_COPY, where a full skb carries all the data and
thus needs to be picked up via recv().
Fixes: 3ab1f683bf
("nfnetlink: add support for memory mapped netlink")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
193 lines
5.8 KiB
C
193 lines
5.8 KiB
C
#ifndef __LINUX_NETLINK_H
|
|
#define __LINUX_NETLINK_H
|
|
|
|
|
|
#include <linux/capability.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/export.h>
|
|
#include <net/scm.h>
|
|
#include <uapi/linux/netlink.h>
|
|
|
|
struct net;
|
|
|
|
static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
|
|
{
|
|
return (struct nlmsghdr *)skb->data;
|
|
}
|
|
|
|
enum netlink_skb_flags {
|
|
NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */
|
|
NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */
|
|
NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */
|
|
NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */
|
|
};
|
|
|
|
struct netlink_skb_parms {
|
|
struct scm_creds creds; /* Skb credentials */
|
|
__u32 portid;
|
|
__u32 dst_group;
|
|
__u32 flags;
|
|
struct sock *sk;
|
|
bool nsid_is_set;
|
|
int nsid;
|
|
};
|
|
|
|
#define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb))
|
|
#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
|
|
|
|
|
|
extern void netlink_table_grab(void);
|
|
extern void netlink_table_ungrab(void);
|
|
|
|
#define NL_CFG_F_NONROOT_RECV (1 << 0)
|
|
#define NL_CFG_F_NONROOT_SEND (1 << 1)
|
|
|
|
/* optional Netlink kernel configuration parameters */
|
|
struct netlink_kernel_cfg {
|
|
unsigned int groups;
|
|
unsigned int flags;
|
|
void (*input)(struct sk_buff *skb);
|
|
struct mutex *cb_mutex;
|
|
int (*bind)(struct net *net, int group);
|
|
void (*unbind)(struct net *net, int group);
|
|
bool (*compare)(struct net *net, struct sock *sk);
|
|
};
|
|
|
|
extern struct sock *__netlink_kernel_create(struct net *net, int unit,
|
|
struct module *module,
|
|
struct netlink_kernel_cfg *cfg);
|
|
static inline struct sock *
|
|
netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
|
|
{
|
|
return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
|
|
}
|
|
|
|
extern void netlink_kernel_release(struct sock *sk);
|
|
extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
|
|
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
|
|
extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
|
|
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
|
|
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
|
|
|
|
extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
|
|
unsigned int ldiff, u32 dst_portid,
|
|
gfp_t gfp_mask);
|
|
static inline struct sk_buff *
|
|
netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
|
|
gfp_t gfp_mask)
|
|
{
|
|
return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
|
|
}
|
|
|
|
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
|
|
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
|
|
__u32 group, gfp_t allocation);
|
|
extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
|
|
__u32 portid, __u32 group, gfp_t allocation,
|
|
int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
|
|
void *filter_data);
|
|
extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
|
|
extern int netlink_register_notifier(struct notifier_block *nb);
|
|
extern int netlink_unregister_notifier(struct notifier_block *nb);
|
|
|
|
/* finegrained unicast helpers: */
|
|
struct sock *netlink_getsockbyfilp(struct file *filp);
|
|
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
|
|
long *timeo, struct sock *ssk);
|
|
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
|
|
int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
|
|
|
|
static inline struct sk_buff *
|
|
netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
|
{
|
|
struct sk_buff *nskb;
|
|
|
|
nskb = skb_clone(skb, gfp_mask);
|
|
if (!nskb)
|
|
return NULL;
|
|
|
|
/* This is a large skb, set destructor callback to release head */
|
|
if (is_vmalloc_addr(skb->head))
|
|
nskb->destructor = skb->destructor;
|
|
|
|
return nskb;
|
|
}
|
|
|
|
/*
|
|
* skb should fit one page. This choice is good for headerless malloc.
|
|
* But we should limit to 8K so that userspace does not have to
|
|
* use enormous buffer sizes on recvmsg() calls just to avoid
|
|
* MSG_TRUNC when PAGE_SIZE is very large.
|
|
*/
|
|
#if PAGE_SIZE < 8192UL
|
|
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE)
|
|
#else
|
|
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL)
|
|
#endif
|
|
|
|
#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
|
|
|
|
|
|
struct netlink_callback {
|
|
struct sk_buff *skb;
|
|
const struct nlmsghdr *nlh;
|
|
int (*dump)(struct sk_buff * skb,
|
|
struct netlink_callback *cb);
|
|
int (*done)(struct netlink_callback *cb);
|
|
void *data;
|
|
/* the module that dump function belong to */
|
|
struct module *module;
|
|
u16 family;
|
|
u16 min_dump_alloc;
|
|
unsigned int prev_seq, seq;
|
|
long args[6];
|
|
};
|
|
|
|
struct netlink_notify {
|
|
struct net *net;
|
|
u32 portid;
|
|
int protocol;
|
|
};
|
|
|
|
struct nlmsghdr *
|
|
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
|
|
|
|
struct netlink_dump_control {
|
|
int (*dump)(struct sk_buff *skb, struct netlink_callback *);
|
|
int (*done)(struct netlink_callback *);
|
|
void *data;
|
|
struct module *module;
|
|
u16 min_dump_alloc;
|
|
};
|
|
|
|
extern int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh,
|
|
struct netlink_dump_control *control);
|
|
static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh,
|
|
struct netlink_dump_control *control)
|
|
{
|
|
if (!control->module)
|
|
control->module = THIS_MODULE;
|
|
|
|
return __netlink_dump_start(ssk, skb, nlh, control);
|
|
}
|
|
|
|
struct netlink_tap {
|
|
struct net_device *dev;
|
|
struct module *module;
|
|
struct list_head list;
|
|
};
|
|
|
|
extern int netlink_add_tap(struct netlink_tap *nt);
|
|
extern int netlink_remove_tap(struct netlink_tap *nt);
|
|
|
|
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
|
|
struct user_namespace *ns, int cap);
|
|
bool netlink_ns_capable(const struct sk_buff *skb,
|
|
struct user_namespace *ns, int cap);
|
|
bool netlink_capable(const struct sk_buff *skb, int cap);
|
|
bool netlink_net_capable(const struct sk_buff *skb, int cap);
|
|
|
|
#endif /* __LINUX_NETLINK_H */
|