Merge branch 'mlxsw-Offload-TC-action-pedit-munge-dsfield'

Ido Schimmel says:

====================
mlxsw: Offload TC action pedit munge dsfield

Petr says:

The Spectrum switches allow packet prioritization based on DSCP on ingress,
and update of DSCP on egress. This is configured through the DCB APP rules.
For some use cases, assigning a custom DSCP value based on an ACL match is
a better tool. To that end, offload FLOW_ACTION_MANGLE to permit changing
of dsfield as a whole, or DSCP and ECN values in isolation.

After fixing a commentary nit in patch #1, and mlxsw naming in patch #2,
patches #3 and #4 add the offload to mlxsw.

Patch #5 adds a forwarding selftest for pedit dsfield, applicable to SW as
well as HW datapaths. Patch #6 adds a mlxsw-specific test to verify DSCP
rewrite due to DCB APP rules is not performed on pedited packets.

The tests only cover IPv4 dsfield setting. We have tests for IPv6 as well,
but would like to postpone their contribution until the corresponding
iproute patches have been accepted.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-03-26 11:55:41 -07:00
commit 92b7e62e56
8 changed files with 515 additions and 8 deletions

View File

@ -1248,15 +1248,51 @@ EXPORT_SYMBOL(mlxsw_afa_block_append_mirror);
#define MLXSW_AFA_QOS_CODE 0x06
#define MLXSW_AFA_QOS_SIZE 1
enum mlxsw_afa_qos_cmd {
enum mlxsw_afa_qos_ecn_cmd {
/* Do nothing */
MLXSW_AFA_QOS_CMD_NOP,
/* Set a field */
MLXSW_AFA_QOS_CMD_SET,
MLXSW_AFA_QOS_ECN_CMD_NOP,
/* Set ECN to afa_qos_ecn */
MLXSW_AFA_QOS_ECN_CMD_SET,
};
/* afa_qos_ecn_cmd
*/
MLXSW_ITEM32(afa, qos, ecn_cmd, 0x04, 29, 3);
/* afa_qos_ecn
* ECN value.
*/
MLXSW_ITEM32(afa, qos, ecn, 0x04, 24, 2);
enum mlxsw_afa_qos_dscp_cmd {
/* Do nothing */
MLXSW_AFA_QOS_DSCP_CMD_NOP,
/* Set DSCP 3 LSB bits according to dscp[2:0] */
MLXSW_AFA_QOS_DSCP_CMD_SET_3LSB,
/* Set DSCP 3 MSB bits according to dscp[5:3] */
MLXSW_AFA_QOS_DSCP_CMD_SET_3MSB,
/* Set DSCP 6 bits according to dscp[5:0] */
MLXSW_AFA_QOS_DSCP_CMD_SET_ALL,
};
/* afa_qos_dscp_cmd
* DSCP command.
*/
MLXSW_ITEM32(afa, qos, dscp_cmd, 0x04, 14, 2);
/* afa_qos_dscp
* DSCP value.
*/
MLXSW_ITEM32(afa, qos, dscp, 0x04, 0, 6);
enum mlxsw_afa_qos_switch_prio_cmd {
/* Do nothing */
MLXSW_AFA_QOS_SWITCH_PRIO_CMD_NOP,
/* Set Switch Priority to afa_qos_switch_prio */
MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET,
};
/* afa_qos_switch_prio_cmd
* Switch Priority command as per mlxsw_afa_qos_cmd.
*/
MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2);
@ -1265,14 +1301,98 @@ MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2);
*/
MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4);
enum mlxsw_afa_qos_dscp_rw {
MLXSW_AFA_QOS_DSCP_RW_PRESERVE,
MLXSW_AFA_QOS_DSCP_RW_SET,
MLXSW_AFA_QOS_DSCP_RW_CLEAR,
};
/* afa_qos_dscp_rw
* DSCP Re-write Enable. Controlling the rewrite_enable for DSCP.
*/
MLXSW_ITEM32(afa, qos, dscp_rw, 0x0C, 30, 2);
static inline void
mlxsw_afa_qos_ecn_pack(char *payload,
enum mlxsw_afa_qos_ecn_cmd ecn_cmd, u8 ecn)
{
mlxsw_afa_qos_ecn_cmd_set(payload, ecn_cmd);
mlxsw_afa_qos_ecn_set(payload, ecn);
}
static inline void
mlxsw_afa_qos_dscp_pack(char *payload,
enum mlxsw_afa_qos_dscp_cmd dscp_cmd, u8 dscp)
{
mlxsw_afa_qos_dscp_cmd_set(payload, dscp_cmd);
mlxsw_afa_qos_dscp_set(payload, dscp);
}
static inline void
mlxsw_afa_qos_switch_prio_pack(char *payload,
enum mlxsw_afa_qos_cmd prio_cmd, u8 prio)
enum mlxsw_afa_qos_switch_prio_cmd prio_cmd,
u8 prio)
{
mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd);
mlxsw_afa_qos_switch_prio_set(payload, prio);
}
static int __mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block,
bool set_dscp, u8 dscp,
bool set_ecn, u8 ecn,
struct netlink_ext_ack *extack)
{
char *act = mlxsw_afa_block_append_action(block,
MLXSW_AFA_QOS_CODE,
MLXSW_AFA_QOS_SIZE);
if (IS_ERR(act)) {
NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action");
return PTR_ERR(act);
}
if (set_ecn)
mlxsw_afa_qos_ecn_pack(act, MLXSW_AFA_QOS_ECN_CMD_SET, ecn);
if (set_dscp) {
mlxsw_afa_qos_dscp_pack(act, MLXSW_AFA_QOS_DSCP_CMD_SET_ALL,
dscp);
mlxsw_afa_qos_dscp_rw_set(act, MLXSW_AFA_QOS_DSCP_RW_CLEAR);
}
return 0;
}
int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block,
u8 dsfield,
struct netlink_ext_ack *extack)
{
return __mlxsw_afa_block_append_qos_dsfield(block,
true, dsfield >> 2,
true, dsfield & 0x03,
extack);
}
EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dsfield);
int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block,
u8 dscp, struct netlink_ext_ack *extack)
{
return __mlxsw_afa_block_append_qos_dsfield(block,
true, dscp,
false, 0,
extack);
}
EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dscp);
int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block,
u8 ecn, struct netlink_ext_ack *extack)
{
return __mlxsw_afa_block_append_qos_dsfield(block,
false, 0,
true, ecn,
extack);
}
EXPORT_SYMBOL(mlxsw_afa_block_append_qos_ecn);
int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block,
u8 prio,
struct netlink_ext_ack *extack)
@ -1285,7 +1405,7 @@ int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block,
NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action");
return PTR_ERR(act);
}
mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_CMD_SET,
mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET,
prio);
return 0;
}

View File

@ -65,6 +65,13 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block,
u8 prio,
struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block,
u8 dsfield,
struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block,
u8 dscp, struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block,
u8 ecn, struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
u32 counter_index);
int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,

View File

@ -749,6 +749,11 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
u32 prio, struct netlink_ext_ack *extack);
int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
enum flow_action_mangle_base htype,
u32 offset, u32 mask, u32 val,
struct netlink_ext_ack *extack);
int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
struct netlink_ext_ack *extack);

View File

@ -655,6 +655,97 @@ int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp,
extack);
}
enum mlxsw_sp_acl_mangle_field {
MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD,
MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP,
MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN,
};
struct mlxsw_sp_acl_mangle_action {
enum flow_action_mangle_base htype;
/* Offset is u32-aligned. */
u32 offset;
/* Mask bits are unset for the modified field. */
u32 mask;
/* Shift required to extract the set value. */
u32 shift;
enum mlxsw_sp_acl_mangle_field field;
};
#define MLXSW_SP_ACL_MANGLE_ACTION(_htype, _offset, _mask, _shift, _field) \
{ \
.htype = _htype, \
.offset = _offset, \
.mask = _mask, \
.shift = _shift, \
.field = MLXSW_SP_ACL_MANGLE_FIELD_##_field, \
}
#define MLXSW_SP_ACL_MANGLE_ACTION_IP4(_offset, _mask, _shift, _field) \
MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP4, \
_offset, _mask, _shift, _field)
#define MLXSW_SP_ACL_MANGLE_ACTION_IP6(_offset, _mask, _shift, _field) \
MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP6, \
_offset, _mask, _shift, _field)
static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = {
MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff00ffff, 16, IP_DSFIELD),
MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff03ffff, 18, IP_DSCP),
MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xfffcffff, 16, IP_ECN),
MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf00fffff, 20, IP_DSFIELD),
MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf03fffff, 22, IP_DSCP),
MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xffcfffff, 20, IP_ECN),
};
static int
mlxsw_sp_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
struct mlxsw_sp_acl_mangle_action *mact,
u32 val, struct netlink_ext_ack *extack)
{
switch (mact->field) {
case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD:
return mlxsw_afa_block_append_qos_dsfield(rulei->act_block,
val, extack);
case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP:
return mlxsw_afa_block_append_qos_dscp(rulei->act_block,
val, extack);
case MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN:
return mlxsw_afa_block_append_qos_ecn(rulei->act_block,
val, extack);
}
/* We shouldn't have gotten a match in the first place! */
WARN_ONCE(1, "Unhandled mangle field");
return -EINVAL;
}
int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
enum flow_action_mangle_base htype,
u32 offset, u32 mask, u32 val,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_acl_mangle_action *mact;
size_t i;
for (i = 0; i < ARRAY_SIZE(mlxsw_sp_acl_mangle_actions); ++i) {
mact = &mlxsw_sp_acl_mangle_actions[i];
if (mact->htype == htype &&
mact->offset == offset &&
mact->mask == mask) {
val >>= mact->shift;
return mlxsw_sp_acl_rulei_act_mangle_field(mlxsw_sp,
rulei, mact,
val, extack);
}
}
NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field");
return -EINVAL;
}
int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
struct netlink_ext_ack *extack)

View File

@ -158,6 +158,21 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei,
act->priority,
extack);
case FLOW_ACTION_MANGLE: {
enum flow_action_mangle_base htype = act->mangle.htype;
__be32 be_mask = (__force __be32) act->mangle.mask;
__be32 be_val = (__force __be32) act->mangle.val;
u32 offset = act->mangle.offset;
u32 mask = be32_to_cpu(be_mask);
u32 val = be32_to_cpu(be_val);
err = mlxsw_sp_acl_rulei_act_mangle(mlxsw_sp, rulei,
htype, offset,
mask, val, extack);
if (err)
return err;
break;
}
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");

View File

@ -202,7 +202,8 @@ struct flow_action_entry {
__be16 proto;
u8 prio;
} vlan;
struct { /* FLOW_ACTION_PACKET_EDIT */
struct { /* FLOW_ACTION_MANGLE */
/* FLOW_ACTION_ADD */
enum flow_action_mangle_base htype;
u32 offset;
u32 mask;

View File

@ -31,6 +31,7 @@ ALL_TESTS="
ping_ipv4
test_update
test_no_update
test_pedit_norewrite
test_dscp_leftover
"
@ -56,6 +57,11 @@ zero()
echo 0
}
three()
{
echo 3
}
h1_create()
{
simple_if_init $h1 192.0.2.1/28
@ -103,6 +109,9 @@ switch_create()
simple_if_init $swp1 192.0.2.2/28
__simple_if_init $swp2 v$swp1 192.0.2.17/28
tc qdisc add dev $swp1 clsact
tc qdisc add dev $swp2 clsact
lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
lldpad_app_wait_set $swp1
@ -115,6 +124,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
__simple_if_fini $swp2 192.0.2.17/28
simple_if_fini $swp1 192.0.2.2/28
}
@ -223,18 +235,36 @@ __test_update()
test_update()
{
echo "Test net.ipv4.ip_forward_update_priority=1"
__test_update 1 reprioritize
}
test_no_update()
{
echo "Test net.ipv4.ip_forward_update_priority=0"
__test_update 0 echo
}
# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
test_pedit_norewrite()
{
echo "Test no DSCP rewrite after DSCP is updated by pedit"
tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
action skbedit priority 3
__test_update 0 three
tc filter del dev $swp1 ingress pref 1
}
# Test that when the last APP rule is removed, the prio->DSCP map is properly
# set to zeroes, and that the last APP rule does not stay active in the ASIC.
test_dscp_leftover()
{
echo "Test that last removed DSCP rule is deconfigured correctly"
lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del

View File

@ -0,0 +1,238 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
# filter installed on $h2 verifies that the packet looks like expected.
#
# +----------------------+ +----------------------+
# | H1 | | H2 |
# | + $h1 | | $h2 + |
# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
# +----|-----------------+ +----------------|-----+
# | |
# +----|----------------------------------------------------------------|-----+
# | SW | | |
# | +-|----------------------------------------------------------------|-+ |
# | | + $swp1 BR $swp2 + | |
# | +--------------------------------------------------------------------+ |
# +---------------------------------------------------------------------------+
ALL_TESTS="
ping_ipv4
test_ip_dsfield
test_ip_dscp
test_ip_ecn
test_ip_dscp_ecn
"
NUM_NETIFS=4
source lib.sh
source tc_common.sh
: ${HIT_TIMEOUT:=2000} # ms
h1_create()
{
simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
}
h1_destroy()
{
simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
}
h2_create()
{
simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
tc qdisc add dev $h2 clsact
}
h2_destroy()
{
tc qdisc del dev $h2 clsact
simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
}
switch_create()
{
ip link add name br1 up type bridge vlan_filtering 1
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
ip link set dev $swp2 up
tc qdisc add dev $swp1 clsact
tc qdisc add dev $swp2 clsact
}
switch_destroy()
{
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
ip link set dev $swp2 nomaster
ip link set dev $swp1 nomaster
ip link del dev br1
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
swp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
h2mac=$(mac_get $h2)
vrf_prepare
h1_create
h2_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1 192.0.2.2
}
do_test_pedit_dsfield_common()
{
local pedit_locus=$1; shift
local pedit_action=$1; shift
local mz_flags=$1; shift
RET=0
# TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is
# overwritten when zero is selected.
$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
-a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345
local pkts
pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
tc_rule_handle_stats_get "dev $h2 ingress" 101)
check_err $? "Expected to get 10 packets, but got $pkts."
log_test "$pedit_locus pedit $pedit_action"
}
do_test_pedit_dsfield()
{
local pedit_locus=$1; shift
local pedit_action=$1; shift
local match_prot=$1; shift
local match_flower=$1; shift
local mz_flags=$1; shift
local saddr=$1; shift
local daddr=$1; shift
tc filter add $pedit_locus handle 101 pref 1 \
flower action pedit ex munge $pedit_action
tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
flower skip_hw $match_flower action pass
do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags"
tc filter del dev $h2 ingress pref 1
tc filter del $pedit_locus pref 1
}
do_test_ip_dsfield()
{
local locus=$1; shift
local dsfield
for dsfield in 0 1 2 3 128 252 253 254 255; do
do_test_pedit_dsfield "$locus" \
"ip dsfield set $dsfield" \
ip "ip_tos $dsfield" \
"-A 192.0.2.1 -B 192.0.2.2"
done
}
test_ip_dsfield()
{
do_test_ip_dsfield "dev $swp1 ingress"
do_test_ip_dsfield "dev $swp2 egress"
}
do_test_ip_dscp()
{
local locus=$1; shift
local dscp
for dscp in 0 1 2 3 32 61 62 63; do
do_test_pedit_dsfield "$locus" \
"ip dsfield set $((dscp << 2)) retain 0xfc" \
ip "ip_tos $(((dscp << 2) | 1))" \
"-A 192.0.2.1 -B 192.0.2.2"
done
}
test_ip_dscp()
{
do_test_ip_dscp "dev $swp1 ingress"
do_test_ip_dscp "dev $swp2 egress"
}
do_test_ip_ecn()
{
local locus=$1; shift
local ecn
for ecn in 0 1 2 3; do
do_test_pedit_dsfield "$locus" \
"ip dsfield set $ecn retain 0x03" \
ip "ip_tos $((124 | $ecn))" \
"-A 192.0.2.1 -B 192.0.2.2"
done
}
test_ip_ecn()
{
do_test_ip_ecn "dev $swp1 ingress"
do_test_ip_ecn "dev $swp2 egress"
}
do_test_ip_dscp_ecn()
{
local locus=$1; shift
tc filter add $locus handle 101 pref 1 \
flower action pedit ex munge ip dsfield set 124 retain 0xfc \
action pedit ex munge ip dsfield set 1 retain 0x03
tc filter add dev $h2 ingress handle 101 pref 1 prot ip \
flower skip_hw ip_tos 125 action pass
do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \
"-A 192.0.2.1 -B 192.0.2.2"
tc filter del dev $h2 ingress pref 1
tc filter del $locus pref 1
}
test_ip_dscp_ecn()
{
do_test_ip_dscp_ecn "dev $swp1 ingress"
do_test_ip_dscp_ecn "dev $swp2 egress"
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS