[Acked] [Xenial SRU] Fix performance regression in tunneled connections

Andy Whitcroft apw at canonical.com
Fri Jul 29 15:57:38 UTC 2016


On Mon, Jul 25, 2016 at 04:30:00PM +0200, Stefan Bader wrote:
> It was mentioned that we had this fix for yakkety (though I guess that
> means in the 4.6+ kernel which may come soon). This is the backport
> for Xenial which was tested by the patch author.
> 
> -Stefan
> 
> 
> From b311341e0bd9aca6d1a7303b5a8da9bebee64338 Mon Sep 17 00:00:00 2001
> From: Jesse Gross <jesse at kernel.org>
> Date: Sat, 19 Mar 2016 09:32:02 -0700
> Subject: [PATCH] tunnels: Remove encapsulation offloads on decap.
> 
> If a packet is either locally encapsulated or processed through GRO
> it is marked with the offloads that it requires. However, when it is
> decapsulated these tunnel offload indications are not removed. This
> means that if we receive an encapsulated TCP packet, aggregate it with
> GRO, decapsulate, and retransmit the resulting frame on a NIC that does
> not support encapsulation, we won't be able to take advantage of hardware
> offloads even though it is just a simple TCP packet at this point.
> 
> This fixes the problem by stripping off encapsulation offload indications
> when packets are decapsulated.
> 
> The performance impacts of this bug are significant. In a test where a
> Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated,
> and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a
> result of avoiding unnecessary segmentation at the VM tap interface.
> 
> Reported-by: Ramu Ramamurthy <sramamur at linux.vnet.ibm.com>
> Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE")
> Signed-off-by: Jesse Gross <jesse at kernel.org>
> Signed-off-by: David S. Miller <davem at davemloft.net>
> 
> BugLink: http://bugs.launchpad.net/bugs/1602755
> 
> (backported from commit a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168)
> [adapt iptunnel_pull_header arguments, avoid 7f290c9]
> Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
> ---
>  include/net/ip_tunnels.h  | 16 ++++++++++++++++
>  net/ipv4/fou.c            | 13 +++++++++++--
>  net/ipv4/ip_tunnel_core.c |  3 ++-
>  net/ipv6/sit.c            |  5 +++--
>  4 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index 28a38e5..9e55c15 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -310,6 +310,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
>  struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
>  					 int gso_type_mask);
>  
> +static inline int iptunnel_pull_offloads(struct sk_buff *skb)
> +{
> +	if (skb_is_gso(skb)) {
> +		int err;
> +
> +		err = skb_unclone(skb, GFP_ATOMIC);
> +		if (unlikely(err))
> +			return err;
> +		skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
> +					       NETIF_F_GSO_SHIFT);
> +	}
> +
> +	skb->encapsulation = 0;
> +	return 0;
> +}
> +
>  static inline void iptunnel_xmit_stats(int err,
>  				       struct net_device_stats *err_stats,
>  				       struct pcpu_sw_netstats __percpu *stats)
> diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
> index bd903fe..08d7de5 100644
> --- a/net/ipv4/fou.c
> +++ b/net/ipv4/fou.c
> @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk)
>  	return sk->sk_user_data;
>  }
>  
> -static void fou_recv_pull(struct sk_buff *skb, size_t len)
> +static int fou_recv_pull(struct sk_buff *skb, size_t len)
>  {
>  	struct iphdr *iph = ip_hdr(skb);
>  
> @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len)
>  	__skb_pull(skb, len);
>  	skb_postpull_rcsum(skb, udp_hdr(skb), len);
>  	skb_reset_transport_header(skb);
> +	return iptunnel_pull_offloads(skb);
>  }
>  
>  static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
> @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
>  	if (!fou)
>  		return 1;
>  
> -	fou_recv_pull(skb, sizeof(struct udphdr));
> +	if (fou_recv_pull(skb, sizeof(struct udphdr)))
> +		goto drop;
>  
>  	return -fou->protocol;
> +
> +drop:
> +	kfree_skb(skb);
> +	return 0;
>  }
>  
>  static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
> @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
>  	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
>  	skb_reset_transport_header(skb);
>  
> +	if (iptunnel_pull_offloads(skb))
> +		goto drop;
> +
>  	return -guehdr->proto_ctype;
>  
>  drop:
> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
> index 6cb9009..dbda056 100644
> --- a/net/ipv4/ip_tunnel_core.c
> +++ b/net/ipv4/ip_tunnel_core.c
> @@ -116,7 +116,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
>  	skb->vlan_tci = 0;
>  	skb_set_queue_mapping(skb, 0);
>  	skb->pkt_type = PACKET_HOST;
> -	return 0;
> +
> +	return iptunnel_pull_offloads(skb);
>  }
>  EXPORT_SYMBOL_GPL(iptunnel_pull_header);
>  
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index dcccae8..e088f0e 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -681,14 +681,15 @@ static int ipip6_rcv(struct sk_buff *skb)
>  		skb->mac_header = skb->network_header;
>  		skb_reset_network_header(skb);
>  		IPCB(skb)->flags = 0;
> -		skb->protocol = htons(ETH_P_IPV6);
> +		skb->dev = tunnel->dev;
>  
>  		if (packet_is_spoofed(skb, iph, tunnel)) {
>  			tunnel->dev->stats.rx_errors++;
>  			goto out;
>  		}
>  
> -		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
> +		if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6)))
> +			goto out;
>  
>  		err = IP_ECN_decapsulate(iph, skb);
>  		if (unlikely(err)) {
> -- 

Seems to do what is claimed.

Acked-by: Andy Whitcroft <apw at canonical.com>

-apw




More information about the kernel-team mailing list