[bionic:linux-azure-4.15][PATCH 1/2] hv_netvsc: record hardware hash in skb

Marcelo Henrique Cerri marcelo.cerri at canonical.com
Mon Dec 14 20:17:09 UTC 2020


From: Dexuan Cui <decui at microsoft.com>

BugLink: https://bugs.launchpad.net/bugs/1902531

Since RSS hash is available from the host, record it in
the skb.

Signed-off-by: Stephen Hemminger <sthemmin at microsoft.com>
Signed-off-by: David S. Miller <davem at davemloft.net>

This is a backport of the below commit in the mainline:
commit 1fac7ca4e63b ("hv_netvsc: record hardware hash in skb")

netvsc_start_xmit() only adds the NBL_HASH_VALUE RNDIS PPI field (which
is used by the host NetVSP driver as a hint to spread the TX-Completion
events of the physical NIC to different host CPUs) if skb->hash != 0.

Typically Linux sets skb->hash for every outgoing TCP/UDP skb in
netvsc_select_queue() -> netvsc_pick_tx() -> netvsc_get_tx_queue() ->
netvsc_get_hash() -> skb_get_hash() -> __skb_get_hash ->
__skb_set_sw_hash(), but when Linux VM works in IP-forwarding mode,
currently skb->hash always has the default value zero because
netvsc_select_queue() -> netvsc_pick_tx() -> skb_get_rx_queue() doesn't
set skb->hash, so the host side TX-Completion events usually always
happen on a specific single host CPU, which can be overloaded easily
when the packet rate is high. Fix the issue by using the RX hash value
as the TX hash value.

Cc: Haiyang Zhang <haiyangz at microsoft.com>
Signed-off-by: Dexuan Cui <decui at microsoft.com>
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
---
 drivers/net/hyperv/hyperv_net.h   |  3 ++-
 drivers/net/hyperv/netvsc_drv.c   | 11 ++++++++---
 drivers/net/hyperv/rndis_filter.c |  4 +++-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 83e040359037..58ba775d8dd2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -205,7 +205,8 @@ int netvsc_recv_callback(struct net_device *net,
 			 struct vmbus_channel *channel,
 			 void  *data, u32 len,
 			 const struct ndis_tcp_ip_checksum_info *csum_info,
-			 const struct ndis_pkt_8021q_info *vlan);
+			 const struct ndis_pkt_8021q_info *vlan,
+			 const u32 *hash_info);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 8f81f5e08581..391d67feedaa 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -780,7 +780,8 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 					     struct napi_struct *napi,
 					     const struct ndis_tcp_ip_checksum_info *csum_info,
 					     const struct ndis_pkt_8021q_info *vlan,
-					     void *data, u32 buflen)
+					     void *data, u32 buflen,
+					     const u32 *hash_info)
 {
 	struct sk_buff *skb;
 
@@ -816,6 +817,9 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
+	if (hash_info)
+		skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
 	if (vlan) {
 		u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT);
 
@@ -834,7 +838,8 @@ int netvsc_recv_callback(struct net_device *net,
 			 struct vmbus_channel *channel,
 			 void  *data, u32 len,
 			 const struct ndis_tcp_ip_checksum_info *csum_info,
-			 const struct ndis_pkt_8021q_info *vlan)
+			 const struct ndis_pkt_8021q_info *vlan,
+			 const u32 *hash_info)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
 	struct netvsc_device *net_device;
@@ -855,7 +860,7 @@ int netvsc_recv_callback(struct net_device *net,
 
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
-				    csum_info, vlan, data, len);
+				    csum_info, vlan, data, len, hash_info);
 	if (unlikely(!skb)) {
 drop:
 		++net->stats.rx_dropped;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 29e8741e1891..02b784efa1c8 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -369,6 +369,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 	struct rndis_packet *rndis_pkt = &msg->msg.pkt;
 	const struct ndis_tcp_ip_checksum_info *csum_info;
 	const struct ndis_pkt_8021q_info *vlan;
+	const u32 *hash_info;
 	u32 data_offset;
 
 	/* Remove the rndis header and pass it back up the stack */
@@ -397,9 +398,10 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 	 */
 	data = (void *)((unsigned long)data + data_offset);
 	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+	hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE);
 	return netvsc_recv_callback(ndev, channel,
 				    data, rndis_pkt->data_len,
-				    csum_info, vlan);
+				    csum_info, vlan, hash_info);
 }
 
 int rndis_filter_receive(struct net_device *ndev,
-- 
2.25.1




More information about the kernel-team mailing list