[Acked] [Xenial SRU] Fix performance regression in tunneled connections
Andy Whitcroft
apw at canonical.com
Fri Jul 29 15:57:38 UTC 2016
On Mon, Jul 25, 2016 at 04:30:00PM +0200, Stefan Bader wrote:
> It was mentioned that we had this fix for yakkety (though I guess that
> means in the 4.6+ kernel which may come soon). This is the backport
> for Xenial which was tested by the patch author.
>
> -Stefan
>
>
> From b311341e0bd9aca6d1a7303b5a8da9bebee64338 Mon Sep 17 00:00:00 2001
> From: Jesse Gross <jesse at kernel.org>
> Date: Sat, 19 Mar 2016 09:32:02 -0700
> Subject: [PATCH] tunnels: Remove encapsulation offloads on decap.
>
> If a packet is either locally encapsulated or processed through GRO
> it is marked with the offloads that it requires. However, when it is
> decapsulated these tunnel offload indications are not removed. This
> means that if we receive an encapsulated TCP packet, aggregate it with
> GRO, decapsulate, and retransmit the resulting frame on a NIC that does
> not support encapsulation, we won't be able to take advantage of hardware
> offloads even though it is just a simple TCP packet at this point.
>
> This fixes the problem by stripping off encapsulation offload indications
> when packets are decapsulated.
>
> The performance impacts of this bug are significant. In a test where a
> Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated,
> and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a
> result of avoiding unnecessary segmentation at the VM tap interface.
>
> Reported-by: Ramu Ramamurthy <sramamur at linux.vnet.ibm.com>
> Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE")
> Signed-off-by: Jesse Gross <jesse at kernel.org>
> Signed-off-by: David S. Miller <davem at davemloft.net>
>
> BugLink: http://bugs.launchpad.net/bugs/1602755
>
> (backported from commit a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168)
> [adapt iptunnel_pull_header arguments, avoid 7f290c9]
> Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
> ---
> include/net/ip_tunnels.h | 16 ++++++++++++++++
> net/ipv4/fou.c | 13 +++++++++++--
> net/ipv4/ip_tunnel_core.c | 3 ++-
> net/ipv6/sit.c | 5 +++--
> 4 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index 28a38e5..9e55c15 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -310,6 +310,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
> struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
> int gso_type_mask);
>
> +static inline int iptunnel_pull_offloads(struct sk_buff *skb)
> +{
> + if (skb_is_gso(skb)) {
> + int err;
> +
> + err = skb_unclone(skb, GFP_ATOMIC);
> + if (unlikely(err))
> + return err;
> + skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
> + NETIF_F_GSO_SHIFT);
> + }
> +
> + skb->encapsulation = 0;
> + return 0;
> +}
> +
> static inline void iptunnel_xmit_stats(int err,
> struct net_device_stats *err_stats,
> struct pcpu_sw_netstats __percpu *stats)
> diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
> index bd903fe..08d7de5 100644
> --- a/net/ipv4/fou.c
> +++ b/net/ipv4/fou.c
> @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk)
> return sk->sk_user_data;
> }
>
> -static void fou_recv_pull(struct sk_buff *skb, size_t len)
> +static int fou_recv_pull(struct sk_buff *skb, size_t len)
> {
> struct iphdr *iph = ip_hdr(skb);
>
> @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len)
> __skb_pull(skb, len);
> skb_postpull_rcsum(skb, udp_hdr(skb), len);
> skb_reset_transport_header(skb);
> + return iptunnel_pull_offloads(skb);
> }
>
> static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
> @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
> if (!fou)
> return 1;
>
> - fou_recv_pull(skb, sizeof(struct udphdr));
> + if (fou_recv_pull(skb, sizeof(struct udphdr)))
> + goto drop;
>
> return -fou->protocol;
> +
> +drop:
> + kfree_skb(skb);
> + return 0;
> }
>
> static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
> @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
> __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
> skb_reset_transport_header(skb);
>
> + if (iptunnel_pull_offloads(skb))
> + goto drop;
> +
> return -guehdr->proto_ctype;
>
> drop:
> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
> index 6cb9009..dbda056 100644
> --- a/net/ipv4/ip_tunnel_core.c
> +++ b/net/ipv4/ip_tunnel_core.c
> @@ -116,7 +116,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
> skb->vlan_tci = 0;
> skb_set_queue_mapping(skb, 0);
> skb->pkt_type = PACKET_HOST;
> - return 0;
> +
> + return iptunnel_pull_offloads(skb);
> }
> EXPORT_SYMBOL_GPL(iptunnel_pull_header);
>
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index dcccae8..e088f0e 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -681,14 +681,15 @@ static int ipip6_rcv(struct sk_buff *skb)
> skb->mac_header = skb->network_header;
> skb_reset_network_header(skb);
> IPCB(skb)->flags = 0;
> - skb->protocol = htons(ETH_P_IPV6);
> + skb->dev = tunnel->dev;
>
> if (packet_is_spoofed(skb, iph, tunnel)) {
> tunnel->dev->stats.rx_errors++;
> goto out;
> }
>
> - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
> + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6)))
> + goto out;
>
> err = IP_ECN_decapsulate(iph, skb);
> if (unlikely(err)) {
> --
Seems to do what is claimed.
Acked-by: Andy Whitcroft <apw at canonical.com>
-apw
More information about the kernel-team
mailing list