[bionic:linux-azure-4.15][PATCH 1/2] hv_netvsc: record hardware hash in skb
Marcelo Henrique Cerri
marcelo.cerri at canonical.com
Mon Dec 14 20:17:09 UTC 2020
From: Dexuan Cui <decui at microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/1902531
Since RSS hash is available from the host, record it in
the skb.
Signed-off-by: Stephen Hemminger <sthemmin at microsoft.com>
Signed-off-by: David S. Miller <davem at davemloft.net>
This is a backport of the below commit in the mainline:
commit 1fac7ca4e63b ("hv_netvsc: record hardware hash in skb")
netvsc_start_xmit() only adds the NBL_HASH_VALUE RNDIS PPI field (which
is used by the host NetVSP driver as a hint to spread the TX-Completion
events of the physical NIC to different host CPUs) if skb->hash != 0.
Typically Linux sets skb->hash for every outgoing TCP/UDP skb in
netvsc_select_queue() -> netvsc_pick_tx() -> netvsc_get_tx_queue() ->
netvsc_get_hash() -> skb_get_hash() -> __skb_get_hash ->
__skb_set_sw_hash(), but when Linux VM works in IP-forwarding mode,
currently skb->hash always has the default value zero because
netvsc_select_queue() -> netvsc_pick_tx() -> skb_get_rx_queue() doesn't
set skb->hash, so the host side TX-Completion events usually always
happen on a specific single host CPU, which can be overloaded easily
when the packet rate is high. Fix the issue by using the RX hash value
as the TX hash value.
Cc: Haiyang Zhang <haiyangz at microsoft.com>
Signed-off-by: Dexuan Cui <decui at microsoft.com>
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
---
drivers/net/hyperv/hyperv_net.h | 3 ++-
drivers/net/hyperv/netvsc_drv.c | 11 ++++++++---
drivers/net/hyperv/rndis_filter.c | 4 +++-
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 83e040359037..58ba775d8dd2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -205,7 +205,8 @@ int netvsc_recv_callback(struct net_device *net,
struct vmbus_channel *channel,
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
- const struct ndis_pkt_8021q_info *vlan);
+ const struct ndis_pkt_8021q_info *vlan,
+ const u32 *hash_info);
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 8f81f5e08581..391d67feedaa 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -780,7 +780,8 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
struct napi_struct *napi,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan,
- void *data, u32 buflen)
+ void *data, u32 buflen,
+ const u32 *hash_info)
{
struct sk_buff *skb;
@@ -816,6 +817,9 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ if (hash_info)
+ skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
if (vlan) {
u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT);
@@ -834,7 +838,8 @@ int netvsc_recv_callback(struct net_device *net,
struct vmbus_channel *channel,
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
- const struct ndis_pkt_8021q_info *vlan)
+ const struct ndis_pkt_8021q_info *vlan,
+ const u32 *hash_info)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct netvsc_device *net_device;
@@ -855,7 +860,7 @@ int netvsc_recv_callback(struct net_device *net,
/* Allocate a skb - TODO direct I/O to pages? */
skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
- csum_info, vlan, data, len);
+ csum_info, vlan, data, len, hash_info);
if (unlikely(!skb)) {
drop:
++net->stats.rx_dropped;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 29e8741e1891..02b784efa1c8 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -369,6 +369,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
struct rndis_packet *rndis_pkt = &msg->msg.pkt;
const struct ndis_tcp_ip_checksum_info *csum_info;
const struct ndis_pkt_8021q_info *vlan;
+ const u32 *hash_info;
u32 data_offset;
/* Remove the rndis header and pass it back up the stack */
@@ -397,9 +398,10 @@ static int rndis_filter_receive_data(struct net_device *ndev,
*/
data = (void *)((unsigned long)data + data_offset);
csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+ hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE);
return netvsc_recv_callback(ndev, channel,
data, rndis_pkt->data_len,
- csum_info, vlan);
+ csum_info, vlan, hash_info);
}
int rndis_filter_receive(struct net_device *ndev,
--
2.25.1
More information about the kernel-team
mailing list