Merge branch 'enic-vxlan-offload'

Govindarajulu Varadarajan says:

====================
enic: add vxlan offload support

This series adds vxlan offload support for enic driver. The first
patch adds vxlan devcmd for configuring vxland offload parameters.
Second patch adds ndo_udp_tunnel_add/del and offload on rx path.
There are to modes in which fw supports vxlan offload.

mode 0: fcoe bit is set for encapsulated packet. fcoe_fc_crc_ok is set
if checksum of csum is ok. This bit is or of ip_csum_ok and
tcp_udp_csum_ok

mode 2: BIT(0) in rss_hash is set if it is encapsulated packet.
        BIT(1) is set if outer_ip_csum_ok/
        BIT(2) is set if outer_tcp_csum_ok

Some hw supports only mode 0, some support mode 0 and 2. Driver gets
the supported modes bitmap using get_supported_feature_ver devcmd
and selects the highest mode both driver and fw supports.

Third patch adds offload support on tx path by adding
enic_features_check().

v2: Order local variable declarations from longest to shortest line,
    on all three patches.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-02-09 17:24:30 -05:00
commit b66a8043d0
6 changed files with 364 additions and 15 deletions

View File

@ -135,6 +135,11 @@ struct enic_rfs_flw_tbl {
struct timer_list rfs_may_expire;
};
struct vxlan_offload {
u16 vxlan_udp_port_number;
u8 patch_level;
};
/* Per-instance private data structure */
struct enic {
struct net_device *netdev;
@ -175,6 +180,7 @@ struct enic {
/* receive queue cache line section */
____cacheline_aligned struct vnic_rq rq[ENIC_RQ_MAX];
unsigned int rq_count;
struct vxlan_offload vxlan;
u64 rq_truncated_pkts;
u64 rq_bad_fcs;
struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX];

View File

@ -45,6 +45,7 @@
#endif
#include <linux/crash_dump.h>
#include <net/busy_poll.h>
#include <net/vxlan.h>
#include "cq_enet_desc.h"
#include "vnic_dev.h"
@ -176,6 +177,134 @@ static void enic_unset_affinity_hint(struct enic *enic)
irq_set_affinity_hint(enic->msix_entry[i].vector, NULL);
}
static void enic_udp_tunnel_add(struct net_device *netdev,
struct udp_tunnel_info *ti)
{
struct enic *enic = netdev_priv(netdev);
__be16 port = ti->port;
int err;
spin_lock_bh(&enic->devcmd_lock);
if (ti->type != UDP_TUNNEL_TYPE_VXLAN) {
netdev_info(netdev, "udp_tnl: only vxlan tunnel offload supported");
goto error;
}
if (ti->sa_family != AF_INET) {
netdev_info(netdev, "vxlan: only IPv4 offload supported");
goto error;
}
if (enic->vxlan.vxlan_udp_port_number) {
if (ntohs(port) == enic->vxlan.vxlan_udp_port_number)
netdev_warn(netdev, "vxlan: udp port already offloaded");
else
netdev_info(netdev, "vxlan: offload supported for only one UDP port");
goto error;
}
err = vnic_dev_overlay_offload_cfg(enic->vdev,
OVERLAY_CFG_VXLAN_PORT_UPDATE,
ntohs(port));
if (err)
goto error;
err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN,
enic->vxlan.patch_level);
if (err)
goto error;
enic->vxlan.vxlan_udp_port_number = ntohs(port);
netdev_info(netdev, "vxlan fw-vers-%d: offload enabled for udp port: %d, sa_family: %d ",
(int)enic->vxlan.patch_level, ntohs(port), ti->sa_family);
goto unlock;
error:
netdev_info(netdev, "failed to offload udp port: %d, sa_family: %d, type: %d",
ntohs(port), ti->sa_family, ti->type);
unlock:
spin_unlock_bh(&enic->devcmd_lock);
}
static void enic_udp_tunnel_del(struct net_device *netdev,
struct udp_tunnel_info *ti)
{
struct enic *enic = netdev_priv(netdev);
int err;
spin_lock_bh(&enic->devcmd_lock);
if ((ti->sa_family != AF_INET) ||
((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) ||
(ti->type != UDP_TUNNEL_TYPE_VXLAN)) {
netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded",
ntohs(ti->port), ti->sa_family, ti->type);
goto unlock;
}
err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN,
OVERLAY_OFFLOAD_DISABLE);
if (err) {
netdev_err(netdev, "vxlan: del offload udp port: %d failed",
ntohs(ti->port));
goto unlock;
}
enic->vxlan.vxlan_udp_port_number = 0;
netdev_info(netdev, "vxlan: del offload udp port %d, family %d\n",
ntohs(ti->port), ti->sa_family);
unlock:
spin_unlock_bh(&enic->devcmd_lock);
}
static netdev_features_t enic_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
{
const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
struct enic *enic = netdev_priv(dev);
struct udphdr *udph;
u16 port = 0;
u16 proto;
if (!skb->encapsulation)
return features;
features = vxlan_features_check(skb, features);
/* hardware only supports IPv4 vxlan tunnel */
if (vlan_get_protocol(skb) != htons(ETH_P_IP))
goto out;
/* hardware does not support offload of ipv6 inner pkt */
if (eth->h_proto != ntohs(ETH_P_IP))
goto out;
proto = ip_hdr(skb)->protocol;
if (proto == IPPROTO_UDP) {
udph = udp_hdr(skb);
port = be16_to_cpu(udph->dest);
}
/* HW supports offload of only one UDP port. Remove CSUM and GSO MASK
* for other UDP port tunnels
*/
if (port != enic->vxlan.vxlan_udp_port_number)
goto out;
return features;
out:
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
int enic_is_dynamic(struct enic *enic)
{
return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
@ -504,20 +633,19 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
return err;
}
static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
struct sk_buff *skb, unsigned int mss,
int vlan_tag_insert, unsigned int vlan_tag,
int loopback)
static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
{
unsigned int frag_len_left = skb_headlen(skb);
unsigned int len_left = skb->len - frag_len_left;
unsigned int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int eop = (len_left == 0);
unsigned int len;
dma_addr_t dma_addr;
unsigned int offset = 0;
skb_frag_t *frag;
if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
inner_ip_hdr(skb)->check = 0;
inner_tcp_hdr(skb)->check =
~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
inner_ip_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
}
}
static void enic_preload_tcp_csum(struct sk_buff *skb)
{
/* Preload TCP csum field with IP pseudo hdr calculated
* with IP length set to zero. HW will later add in length
* to each TCP segment resulting from the TSO.
@ -531,6 +659,30 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
}
}
static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
struct sk_buff *skb, unsigned int mss,
int vlan_tag_insert, unsigned int vlan_tag,
int loopback)
{
unsigned int frag_len_left = skb_headlen(skb);
unsigned int len_left = skb->len - frag_len_left;
int eop = (len_left == 0);
unsigned int offset = 0;
unsigned int hdr_len;
dma_addr_t dma_addr;
unsigned int len;
skb_frag_t *frag;
if (skb->encapsulation) {
hdr_len = skb_inner_transport_header(skb) - skb->data;
hdr_len += inner_tcp_hdrlen(skb);
enic_preload_tcp_csum_encap(skb);
} else {
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
enic_preload_tcp_csum(skb);
}
/* Queue WQ_ENET_MAX_DESC_LEN length descriptors
* for the main skb fragment
@ -579,6 +731,38 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
return 0;
}
static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq,
struct sk_buff *skb,
int vlan_tag_insert,
unsigned int vlan_tag, int loopback)
{
unsigned int head_len = skb_headlen(skb);
unsigned int len_left = skb->len - head_len;
/* Hardware will overwrite the checksum fields, calculating from
* scratch and ignoring the value placed by software.
* Offload mode = 00
* mss[2], mss[1], mss[0] bits are set
*/
unsigned int mss_or_csum = 7;
int eop = (len_left == 0);
dma_addr_t dma_addr;
int err = 0;
dma_addr = pci_map_single(enic->pdev, skb->data, head_len,
PCI_DMA_TODEVICE);
if (unlikely(enic_dma_map_check(enic, dma_addr)))
return -ENOMEM;
enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0,
vlan_tag_insert, vlan_tag,
WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop,
loopback);
if (!eop)
err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
return err;
}
static inline void enic_queue_wq_skb(struct enic *enic,
struct vnic_wq *wq, struct sk_buff *skb)
{
@ -601,6 +785,9 @@ static inline void enic_queue_wq_skb(struct enic *enic,
err = enic_queue_wq_skb_tso(enic, wq, skb, mss,
vlan_tag_insert, vlan_tag,
loopback);
else if (skb->encapsulation)
err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert,
vlan_tag, loopback);
else if (skb->ip_summed == CHECKSUM_PARTIAL)
err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert,
vlan_tag, loopback);
@ -1113,6 +1300,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
u8 packet_error;
u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
u32 rss_hash;
bool outer_csum_ok = true, encap = false;
if (skipped)
return;
@ -1161,7 +1349,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
skb_put(skb, bytes_written);
skb->protocol = eth_type_trans(skb, netdev);
skb_record_rx_queue(skb, q_number);
if (netdev->features & NETIF_F_RXHASH) {
if ((netdev->features & NETIF_F_RXHASH) && rss_hash &&
(type == 3)) {
switch (rss_type) {
case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
@ -1175,15 +1364,39 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
break;
}
}
if (enic->vxlan.vxlan_udp_port_number) {
switch (enic->vxlan.patch_level) {
case 0:
if (fcoe) {
encap = true;
outer_csum_ok = fcoe_fc_crc_ok;
}
break;
case 2:
if ((type == 7) &&
(rss_hash & BIT(0))) {
encap = true;
outer_csum_ok = (rss_hash & BIT(1)) &&
(rss_hash & BIT(2));
}
break;
}
}
/* Hardware does not provide whole packet checksum. It only
* provides pseudo checksum. Since hw validates the packet
* checksum but not provide us the checksum value. use
* CHECSUM_UNNECESSARY.
*
* In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is
* inner csum_ok. outer_csum_ok is set by hw when outer udp
* csum is correct or is zero.
*/
if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok &&
ipv4_csum_ok)
if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = encap;
}
if (vlan_stripped)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
@ -2285,6 +2498,9 @@ static const struct net_device_ops enic_netdev_dynamic_ops = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = enic_rx_flow_steer,
#endif
.ndo_udp_tunnel_add = enic_udp_tunnel_add,
.ndo_udp_tunnel_del = enic_udp_tunnel_del,
.ndo_features_check = enic_features_check,
};
static const struct net_device_ops enic_netdev_ops = {
@ -2308,6 +2524,9 @@ static const struct net_device_ops enic_netdev_ops = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = enic_rx_flow_steer,
#endif
.ndo_udp_tunnel_add = enic_udp_tunnel_add,
.ndo_udp_tunnel_del = enic_udp_tunnel_del,
.ndo_features_check = enic_features_check,
};
static void enic_dev_deinit(struct enic *enic)
@ -2683,6 +2902,39 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->hw_features |= NETIF_F_RXHASH;
if (ENIC_SETTING(enic, RXCSUM))
netdev->hw_features |= NETIF_F_RXCSUM;
if (ENIC_SETTING(enic, VXLAN)) {
u64 patch_level;
netdev->hw_enc_features |= NETIF_F_RXCSUM |
NETIF_F_TSO |
NETIF_F_TSO_ECN |
NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_HW_CSUM |
NETIF_F_GSO_UDP_TUNNEL_CSUM;
netdev->hw_features |= netdev->hw_enc_features;
/* get bit mask from hw about supported offload bit level
* BIT(0) = fw supports patch_level 0
* fcoe bit = encap
* fcoe_fc_crc_ok = outer csum ok
* BIT(1) = always set by fw
* BIT(2) = fw supports patch_level 2
* BIT(0) in rss_hash = encap
* BIT(1,2) in rss_hash = outer_ip_csum_ok/
* outer_tcp_csum_ok
* used in enic_rq_indicate_buf
*/
err = vnic_dev_get_supported_feature_ver(enic->vdev,
VIC_FEATURE_VXLAN,
&patch_level);
if (err)
patch_level = 0;
/* mask bits that are supported by driver
*/
patch_level &= BIT_ULL(0) | BIT_ULL(2);
patch_level = fls(patch_level);
patch_level = patch_level ? patch_level - 1 : 0;
enic->vxlan.patch_level = patch_level;
}
netdev->features |= netdev->hw_features;
netdev->vlan_features |= netdev->features;

View File

@ -1247,3 +1247,37 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
return ret;
}
int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config)
{
u64 a0 = overlay;
u64 a1 = config;
int wait = 1000;
return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CTRL, &a0, &a1, wait);
}
int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
u16 vxlan_udp_port_number)
{
u64 a1 = vxlan_udp_port_number;
u64 a0 = overlay;
int wait = 1000;
return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CFG, &a0, &a1, wait);
}
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
u64 *supported_versions)
{
u64 a0 = feature;
int wait = 1000;
u64 a1 = 0;
int ret;
ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
if (!ret)
*supported_versions = a0;
return ret;
}

View File

@ -179,5 +179,10 @@ int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr);
int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
struct filter *data);
int vnic_devcmd_init(struct vnic_dev *vdev);
int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
u16 vxlan_udp_port_number);
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
u64 *supported_versions);
#endif /* _VNIC_DEV_H_ */

View File

@ -406,6 +406,31 @@ enum vnic_devcmd_cmd {
* in: (u32) a0=Queue Pair number
*/
CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63),
/* Use this devcmd for agreeing on the highest common version supported
* by both driver and fw for features who need such a facility.
* in: (u64) a0 = feature (driver requests for the supported versions
* on this feature)
* out: (u64) a0 = bitmap of all supported versions for that feature
*/
CMD_GET_SUPP_FEATURE_VER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 69),
/* Control (Enable/Disable) overlay offloads on the given vnic
* in: (u8) a0 = OVERLAY_FEATURE_NVGRE : NVGRE
* a0 = OVERLAY_FEATURE_VXLAN : VxLAN
* in: (u8) a1 = OVERLAY_OFFLOAD_ENABLE : Enable or
* a1 = OVERLAY_OFFLOAD_DISABLE : Disable or
* a1 = OVERLAY_OFFLOAD_ENABLE_V2 : Enable with version 2
*/
CMD_OVERLAY_OFFLOAD_CTRL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 72),
/* Configuration of overlay offloads feature on a given vNIC
* in: (u8) a0 = DEVCMD_OVERLAY_NVGRE : NVGRE
* a0 = DEVCMD_OVERLAY_VXLAN : VxLAN
* in: (u8) a1 = VXLAN_PORT_UPDATE : VxLAN
* in: (u16) a2 = unsigned short int port information
*/
CMD_OVERLAY_OFFLOAD_CFG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 73),
};
/* CMD_ENABLE2 flags */
@ -657,4 +682,30 @@ struct devcmd2_result {
#define DEVCMD2_RING_SIZE 32
#define DEVCMD2_DESC_SIZE 128
enum overlay_feature_t {
OVERLAY_FEATURE_NVGRE = 1,
OVERLAY_FEATURE_VXLAN,
OVERLAY_FEATURE_MAX,
};
enum overlay_ofld_cmd {
OVERLAY_OFFLOAD_ENABLE,
OVERLAY_OFFLOAD_DISABLE,
OVERLAY_OFFLOAD_ENABLE_P2,
OVERLAY_OFFLOAD_MAX,
};
#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0
/* Use this enum to get the supported versions for each of these features
* If you need to use the devcmd_get_supported_feature_version(), add
* the new feature into this enum and install function handler in devcmd.c
*/
enum vic_feature_t {
VIC_FEATURE_VXLAN,
VIC_FEATURE_RDMA,
VIC_FEATURE_VXLAN_PATCH,
VIC_FEATURE_MAX,
};
#endif /* _VNIC_DEVCMD_H_ */

View File

@ -48,6 +48,7 @@ struct vnic_enet_config {
#define VENETF_RSSHASH_IPV6_EX 0x200 /* Hash on IPv6 extended fields */
#define VENETF_RSSHASH_TCPIPV6_EX 0x400 /* Hash on TCP + IPv6 ext. fields */
#define VENETF_LOOP 0x800 /* Loopback enabled */
#define VENETF_VXLAN 0x10000 /* VxLAN offload */
#define VENET_INTR_TYPE_MIN 0 /* Timer specs min interrupt spacing */
#define VENET_INTR_TYPE_IDLE 1 /* Timer specs idle time before irq */